def create_fanout_tasks(self, follower_ids, feed_class, operation, operation_kwargs=None, fanout_priority=None): ''' Creates the fanout task for the given activities and feed classes followers It takes the following ids and distributes them per fanout_chunk_size into smaller tasks :param follower_ids: specify the list of followers :param feed_class: the feed classes to run the operation on :param operation: the operation function applied to all follower feeds :param operation_kwargs: kwargs passed to the operation :param fanout_priority: the priority set to this fanout ''' fanout_task = self.get_fanout_task( fanout_priority, feed_class=feed_class) if not fanout_task: return [] chunk_size = self.fanout_chunk_size user_ids_chunks = list(chunks(follower_ids, chunk_size)) msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users' logger.info( msg_format, len(user_ids_chunks), len(follower_ids), chunk_size) tasks = [] # now actually create the tasks for ids_chunk in user_ids_chunks: task = fanout_task.delay( feed_manager=self, feed_class=feed_class, user_ids=ids_chunk, operation=operation, operation_kwargs=operation_kwargs ) tasks.append(task) return tasks
def batch_import(self, user_id, activities, fanout=True, chunk_size=500): ''' Batch import all of the users activities and distributes them to the users followers **Example**:: activities = [long list of activities] stream_framework.batch_import(13, activities, 500) :param user_id: the user who created the activities :param activities: a list of activities from this user :param fanout: if we should run the fanout or not :param chunk_size: per how many activities to run the batch operations ''' activities = list(activities) # skip empty lists if not activities: return logger.info('running batch import for user %s', user_id) user_feed = self.get_user_feed(user_id) if activities[0].actor_id != user_id: raise ValueError('Send activities for only one user please') activity_chunks = list(chunks(activities, chunk_size)) logger.info('processing %s items in %s chunks of %s', len(activities), len(activity_chunks), chunk_size) for index, activity_chunk in enumerate(activity_chunks): # first insert into the global activity storage self.user_feed_class.insert_activities(activity_chunk, self.options) logger.info( 'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk)) # next add the activities to the users personal timeline user_feed.add_many(activity_chunk, trim=False) logger.info('inserted chunk %s (length %s) into the user feed', index, len(activity_chunk)) # now start a big fanout task if fanout: logger.info('starting task fanout for chunk %s', index) follower_ids_by_prio = self.get_user_follower_ids( user_id=user_id) # create the fanout tasks operation_kwargs = dict(activities=activity_chunk, trim=False) for feed_class in self.feed_classes.values(): for priority_group, fids in follower_ids_by_prio.items(): self.create_fanout_tasks( fids, feed_class, add_operation, fanout_priority=priority_group, operation_kwargs=operation_kwargs)
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score_value_chunk in score_value_chunks: result = redis.zadd(key, {score:value for (value, score) in [score_value_chunks]}) logger.debug('adding to %s with score_value_chunk %s', key, score_value_chunk) results.append(result) return results
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score, value in score_value_chunks: result = redis.zadd(key, {value: score}) logger.debug('adding to %s with value %s and score %s', key, value, score) results.append(result) return results
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score_value_chunk in score_value_chunks: result = redis.zadd(key, *score_value_chunk) logger.debug('adding to %s with score_value_chunk %s', key, score_value_chunk) results.append(result) return results
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score_value_chunk in score_value_chunks: result = redis.zadd(key, dict(zip(score_value_chunk[1::2], score_value_chunk[::2]))) logger.debug('adding to %s with score_value_chunk %s', key, score_value_chunk) results.append(result) return results
def batch_import(self, user_id, activities, fanout=True, chunk_size=500): ''' Batch import all of the users activities and distributes them to the users followers **Example**:: activities = [long list of activities] stream_framework.batch_import(13, activities, 500) :param user_id: the user who created the activities :param activities: a list of activities from this user :param fanout: if we should run the fanout or not :param chunk_size: per how many activities to run the batch operations ''' activities = list(activities) # skip empty lists if not activities: return logger.info('running batch import for user %s', user_id) user_feed = self.get_user_feed(user_id) if activities[0].actor_id != user_id: raise ValueError('Send activities for only one user please') activity_chunks = list(chunks(activities, chunk_size)) logger.info('processing %s items in %s chunks of %s', len(activities), len(activity_chunks), chunk_size) for index, activity_chunk in enumerate(activity_chunks): # first insert into the global activity storage self.user_feed_class.insert_activities(activity_chunk) logger.info( 'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk)) # next add the activities to the users personal timeline user_feed.add_many(activity_chunk, trim=False) logger.info( 'inserted chunk %s (length %s) into the user feed', index, len(activity_chunk)) # now start a big fanout task if fanout: logger.info('starting task fanout for chunk %s', index) follower_ids_by_prio = self.get_user_follower_ids( user_id=user_id) # create the fanout tasks operation_kwargs = dict(activities=activity_chunk, trim=False) for feed_class in self.feed_classes.values(): for priority_group, fids in follower_ids_by_prio.items(): self.create_fanout_tasks( fids, feed_class, add_operation, fanout_priority=priority_group, operation_kwargs=operation_kwargs )
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score_value_chunk in score_value_chunks: # redis >3.2 requires a dictionary result = redis.zadd(key, {k: v for (v, k) in [score_value_chunk]}) logger.debug('adding to %s with score_value_chunk %s', key, score_value_chunk) results.append(result) return results
def _add_many(redis, score_value_pairs): score_value_list = sum(map(list, score_value_pairs), []) score_value_chunks = chunks(score_value_list, 200) for score_value_chunk in score_value_chunks: mapping = {} for score, name in zip(*[iter(score_value_chunk)] * 2): mapping[name] = score # import pdb; pdb.set_trace() result = redis.zadd(key, mapping=mapping) logger.debug('adding to %s with score_value_chunk %s', key, score_value_chunk) results.append(result) return results
def create_fanout_tasks(self, follower_ids, feed_class, operation, operation_kwargs=None, fanout_priority=None): """ Creates the fanout task for the given activities and feed classes followers It takes the following ids and distributes them per fanout_chunk_size into smaller tasks """ fanout_task = self.get_fanout_task(fanout_priority, feed_class=feed_class) if not fanout_task: return [] chunk_size = self.fanout_chunk_size user_ids_chunks = list(chunks(follower_ids, chunk_size)) log.info('feed_spawn_tasks', subtasks=len(user_ids_chunks), recipients=len(follower_ids)) tasks = [] for ids_chunk in user_ids_chunks: task = fanout_task.delay(feed_manager=self, feed_class=feed_class, user_ids=ids_chunk, operation=operation, operation_kwargs=operation_kwargs) tasks.append(task) return tasks
def test_chunks(self): chunked = chunks(range(6), 2) chunked = list(chunked) self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)])
def test_one_chunk(self): chunked = chunks(range(2), 5) chunked = list(chunked) self.assertEqual(chunked, [(0, 1)])