示例#1
0
    def get_follower_groups(self, user, update_cache=False):
        '''
        Gets the active and inactive follower groups together with their
        feed max length
        '''
        from feedly.feeds.love_feed import INACTIVE_USER_MAX_LENGTH, ACTIVE_USER_MAX_LENGTH

        active_follower_ids = self.get_active_follower_ids(
            user, update_cache=update_cache)
        inactive_follower_ids = self.get_inactive_follower_ids(
            user, update_cache=update_cache)

        follower_ids = active_follower_ids + inactive_follower_ids

        active_follower_groups = list(
            chunks(active_follower_ids, self.FANOUT_CHUNK_SIZE))
        active_follower_groups = [(follower_group, ACTIVE_USER_MAX_LENGTH) for follower_group in active_follower_groups]

        inactive_follower_groups = list(
            chunks(inactive_follower_ids, self.FANOUT_CHUNK_SIZE))
        inactive_follower_groups = [(follower_group, INACTIVE_USER_MAX_LENGTH) for follower_group in inactive_follower_groups]

        follower_groups = active_follower_groups + inactive_follower_groups

        logger.info('divided %s fanouts into %s tasks', len(
            follower_ids), len(follower_groups))
        return follower_groups
示例#2
0
文件: base.py 项目: okoye/Feedly
    def _start_fanout(self, feed_classes, user_id, operation, follower_ids=None, *args, **kwargs):
        '''
        Start fanout applies the given operation to the feeds of the users
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param feed_classes: the feed classes to run the operation on
        :param user_id: the user id to run the operation for
        :param operation: the operation function applied to all follower feeds
        :param follower_ids: (optionally) specify the list of followers
        :param args: args passed to the operation
        :param kwargs: kwargs passed to the operation
        '''
        user_ids = follower_ids or self.get_user_follower_ids(user_id=user_id)
        user_ids_chunks = list(chunks(user_ids, self.fanout_chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(msg_format, len(user_ids_chunks),
                    len(user_ids), self.fanout_chunk_size)

        # now actually create the tasks
        subs = []
        for ids_chunk in user_ids_chunks:
            for name, feed_class in feed_classes.items():
                feed_class_dict = dict()
                feed_class_dict[name] = feed_class
                task_args = [
                    self, feed_class_dict, ids_chunk, operation] + list(args)
                sub = fanout_operation.apply_async(
                    args=task_args,
                    kwargs=kwargs
                )
                subs.append(sub)
        return subs
示例#3
0
文件: base.py 项目: thinkbox/Feedly
    def create_fanout_tasks(self, follower_ids, feed_class, operation, operation_kwargs=None, fanout_priority=None):
        '''
        Creates the fanout task for the given activities and feed classes
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param follower_ids: specify the list of followers
        :param feed_class: the feed classes to run the operation on
        :param operation: the operation function applied to all follower feeds
        :param operation_kwargs: kwargs passed to the operation
        :param fanout_priority: the priority set to this fanout
        '''
        fanout_task = self.get_fanout_task(
            fanout_priority, feed_class=feed_class)
        if not fanout_task:
            return []
        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(
            msg_format, len(user_ids_chunks), len(follower_ids), chunk_size)
        tasks = []
        # now actually create the tasks
        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(
                feed_manager=self,
                feed_class=feed_class,
                user_ids=ids_chunk,
                operation=operation,
                operation_kwargs=operation_kwargs
            )
            tasks.append(task)
        return tasks
示例#4
0
文件: base.py 项目: cchongXD/Feedly
    def batch_import(self, user_id, activities, fanout=True, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            feedly.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param fanout: if we should run the fanout or not
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)

        # lookup the follower ids if we need them later
        follower_ids = []
        if fanout:
            follower_ids = self.get_user_follower_ids(user_id=user_id)
            logger.info('retrieved %s follower ids', len(follower_ids))

        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s',
                    len(activities), len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk, trim=False)
            logger.info(
                'inserted chunk %s (length %s) into the user feed', index, len(activity_chunk))
            # now start a big fanout task
            if fanout:
                logger.info('starting task fanout for chunk %s', index)

                # create the fanout tasks
                operation_kwargs = dict(activities=activity_chunk, trim=False)
                for feed_class in self.feed_classes.values():
                    self.create_fanout_tasks(
                        follower_ids,
                        feed_class,
                        add_operation,
                        operation_kwargs=operation_kwargs
                    )
示例#5
0
    def batch_import(self, user_id, activities, fanout=True, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            feedly.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param fanout: if we should run the fanout or not
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)

        # lookup the follower ids if we need them later
        follower_ids = []
        if fanout:
            follower_ids = self.get_user_follower_ids(user_id=user_id)
            logger.info('retrieved %s follower ids', len(follower_ids))

        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s',
                    len(activities), len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk, trim=False)
            logger.info(
                'inserted chunk %s (length %s) into the user feed', index, len(activity_chunk))
            # now start a big fanout task
            if fanout:
                logger.info('starting task fanout for chunk %s', index)

                # create the fanout tasks
                operation_kwargs = dict(activities=activity_chunk, trim=False)
                for feed_class in self.feed_classes.values():
                    self.create_fanout_tasks(
                        follower_ids,
                        feed_class,
                        add_operation,
                        operation_kwargs=operation_kwargs
                    )
示例#6
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                result = redis.zadd(key, *score_value_chunk)
                logger.debug("adding to %s with score_value_chunk %s", key, score_value_chunk)
                results.append(result)
            return results
示例#7
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                result = redis.zadd(key, *score_value_chunk)
                logger.debug('adding to %s with score_value_chunk %s', key,
                             score_value_chunk)
                results.append(result)
            return results
示例#8
0
 def add_to_storage(self, key, activities, batch_interface=None, *args, **kwargs):
     '''
     Insert multiple columns using
     client.insert or batch_interface.insert
     '''
     batch = batch_interface or BatchQuery()
     activity_chunks = chunks(activities.itervalues(), 50)
     for activity_chunk in activity_chunks:
         for model_instance in activity_chunk:
             model_instance.feed_id = str(key)
             model_instance.batch(batch).save()
         if batch_interface is None:
             batch.execute()
示例#9
0
文件: base.py 项目: loyning/Feedly
    def batch_import(self, user_id, activities, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            feedly.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)
        follower_ids = self.get_user_follower_ids(user_id=user_id)
        logger.info('retrieved %s follower ids', len(follower_ids))
        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s', len(activities),
                    len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store',
                index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk)
            logger.info('inserted chunk %s (length %s) into the user feed',
                        index, len(activity_chunk))
            # now start a big fanout task
            logger.info('starting task fanout for chunk %s', index)
            self._start_fanout(
                self.feed_classes,
                user_id,
                add_operation,
                follower_ids=follower_ids,
                activities=activity_chunk,
                # disable trimming during the import as its really really slow
                trim=False)
示例#10
0
文件: base.py 项目: okoye/Feedly
    def batch_import(self, user_id, activities, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            feedly.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)
        follower_ids = self.get_user_follower_ids(user_id=user_id)
        logger.info('retrieved %s follower ids', len(follower_ids))
        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s',
                    len(activities), len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the user feed', index, len(activity_chunk))
            # now start a big fanout task
            logger.info('starting task fanout for chunk %s', index)
            self._start_fanout(
                self.feed_classes,
                user_id,
                add_operation,
                follower_ids=follower_ids,
                activities=activity_chunk,
                # disable trimming during the import as its really really slow
                trim=False
            )
示例#11
0
文件: tests.py 项目: mahdiyar/Feedly
    def test_remove_love(self):
        from entity.models import Love
        thessa = User.objects.get(pk=13)
        profile = thessa.get_profile()
        follower_ids = profile.cached_follower_ids()[:100]
        love = Love.objects.all()[:1][0]
        connection = get_redis_connection()

        # divide the followers in groups of 10000
        follower_groups = chunks(follower_ids, 10000)
        for follower_group in follower_groups:
            # now, for these 10000 items pipeline/thread away
            with connection.map() as redis:
                activity = love.create_activity()
                for follower_id in follower_group:
                    feed = LoveFeed(follower_id, redis=redis)
                    feed.remove(activity)
示例#12
0
文件: tests.py 项目: shaj3/Feedly
    def test_add_love(self):
        from entity.models import Love
        thessa = User.objects.get(pk=13)
        profile = thessa.get_profile()
        follower_ids = profile.cached_follower_ids()[:100]
        love = Love.objects.all()[:1][0]
        connection = get_redis_connection()

        # divide the followers in groups of 10000
        follower_groups = chunks(follower_ids, 10000)
        for follower_group in follower_groups:
            # now, for these 10000 items pipeline/thread away
            with connection.map() as redis:
                activity = Activity(love.user, LoveVerb, love, love.user, time=love.created_at, extra_context=dict(hello='world'))
                for follower_id in follower_group:
                    feed = LoveFeed(follower_id, redis=redis)
                    feed.add(activity)
示例#13
0
文件: tests.py 项目: shaj3/Feedly
    def test_remove_love(self):
        from entity.models import Love
        thessa = User.objects.get(pk=13)
        profile = thessa.get_profile()
        follower_ids = profile.cached_follower_ids()[:100]
        love = Love.objects.all()[:1][0]
        connection = get_redis_connection()

        # divide the followers in groups of 10000
        follower_groups = chunks(follower_ids, 10000)
        for follower_group in follower_groups:
            # now, for these 10000 items pipeline/thread away
            with connection.map() as redis:
                activity = love.create_activity()
                for follower_id in follower_group:
                    feed = LoveFeed(follower_id, redis=redis)
                    feed.remove(activity)
示例#14
0
 def add_to_storage(self,
                    key,
                    activities,
                    batch_interface=None,
                    *args,
                    **kwargs):
     '''
     Insert multiple columns using
     client.insert or batch_interface.insert
     '''
     batch = batch_interface or BatchQuery()
     activity_chunks = chunks(activities.itervalues(), 50)
     for activity_chunk in activity_chunks:
         for model_instance in activity_chunk:
             model_instance.feed_id = str(key)
             model_instance.batch(batch).save()
         if batch_interface is None:
             batch.execute()
示例#15
0
 def _fanout(self, user, operation, *args, **kwargs):
     '''
     Generic functionality for running an operation on all of your
     follower's feeds
     
     It takes the following ids and distributes them per FANOUT_CHUNKS
     '''
     following_ids = self.get_follower_ids(user)
     following_groups = chunks(following_ids, self.FANOUT_CHUNK_SIZE)
     feeds = []
     for following_group in following_groups:
         #now, for these items pipeline/thread away via an async task
         from feedly.tasks import fanout_love_feedly
         fanout_love_feedly.delay(self, user, following_group, operation, *args, **kwargs)
                 
     #reset the feeds to get out of the distributed mode
     connection = get_redis_connection()
     for feed in feeds:
         feed.redis = connection
                 
     return feeds
示例#16
0
文件: base.py 项目: loyning/Feedly
    def _start_fanout(self,
                      feed_classes,
                      user_id,
                      operation,
                      follower_ids=None,
                      *args,
                      **kwargs):
        '''
        Start fanout applies the given operation to the feeds of the users
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param feed_classes: the feed classes to run the operation on
        :param user_id: the user id to run the operation for
        :param operation: the operation function applied to all follower feeds
        :param follower_ids: (optionally) specify the list of followers
        :param args: args passed to the operation
        :param kwargs: kwargs passed to the operation
        '''
        user_ids = follower_ids or self.get_user_follower_ids(user_id=user_id)
        user_ids_chunks = list(chunks(user_ids, self.fanout_chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(msg_format, len(user_ids_chunks), len(user_ids),
                    self.fanout_chunk_size)

        # now actually create the tasks
        subs = []
        for ids_chunk in user_ids_chunks:
            for name, feed_class in feed_classes.items():
                feed_class_dict = dict()
                feed_class_dict[name] = feed_class
                task_args = [self, feed_class_dict, ids_chunk, operation
                             ] + list(args)
                sub = fanout_operation.apply_async(args=task_args,
                                                   kwargs=kwargs)
                subs.append(sub)
        return subs
示例#17
0
文件: tests.py 项目: mahdiyar/Feedly
    def test_add_love(self):
        from entity.models import Love
        thessa = User.objects.get(pk=13)
        profile = thessa.get_profile()
        follower_ids = profile.cached_follower_ids()[:100]
        love = Love.objects.all()[:1][0]
        connection = get_redis_connection()

        # divide the followers in groups of 10000
        follower_groups = chunks(follower_ids, 10000)
        for follower_group in follower_groups:
            # now, for these 10000 items pipeline/thread away
            with connection.map() as redis:
                activity = Activity(love.user,
                                    LoveVerb,
                                    love,
                                    love.user,
                                    time=love.created_at,
                                    extra_context=dict(hello='world'))
                for follower_id in follower_group:
                    feed = LoveFeed(follower_id, redis=redis)
                    feed.add(activity)
示例#18
0
 def test_one_chunk(self):
     chunked = chunks(range(2), 5)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1)])
示例#19
0
 def test_chunks(self):
     chunked = chunks(range(6), 2)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)])
示例#20
0
 def test_chunks(self):
     chunked = chunks(range(6), 2)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)])
示例#21
0
 def test_one_chunk(self):
     chunked = chunks(range(2), 5)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1)])