Пример #1
0
def remove_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Remove the activities from the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.remove_many operation for %s activities batch interface %s'
    logger.debug(msg_format, feed, len(activities), batch_interface)
    feed.remove_many(activities, trim=trim, batch_interface=batch_interface)
    logger.debug('remove many operation took %s seconds', t.next())
Пример #2
0
def add_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Add the activities to the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.add_many operation for %s activities batch interface %s and trim %s'
    logger.debug(msg_format, feed, len(activities), batch_interface, trim)
    feed.add_many(activities, batch_interface=batch_interface, trim=trim)
    logger.debug('add many operation took %s seconds', t.next())
Пример #3
0
def add_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Add the activities to the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.add_many operation for %s activities batch interface %s and trim %s'
    logger.debug(msg_format, feed, len(activities), batch_interface, trim)
    feed.add_many(activities, batch_interface=batch_interface, trim=trim)
    logger.debug('add many operation took %s seconds', t.next())
Пример #4
0
def remove_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Remove the activities from the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.remove_many operation for %s activities batch interface %s'
    logger.debug(msg_format, feed, len(activities), batch_interface)
    feed.remove_many(activities, trim=trim, batch_interface=batch_interface)
    logger.debug('remove many operation took %s seconds', t.next())
Пример #5
0
    def add_many(self,
                 activities,
                 trim=True,
                 current_activities=None,
                 *args,
                 **kwargs):
        '''
        Adds many activities to the feed

        Unfortunately we can't support the batch interface.
        The writes depend on the reads.

        Also subsequent writes will depend on these writes.
        So no batching is possible at all.

        :param activities: the list of activities
        '''
        validate_list_of_strict(activities,
                                (self.activity_class, FakeActivity))
        # start by getting the aggregator
        aggregator = self.get_aggregator()

        t = timer()
        # get the current aggregated activities
        if current_activities is None:
            current_activities = self[:self.merge_max_length]
        msg_format = 'reading %s items took %s'
        logger.debug(msg_format, self.merge_max_length, t.next())

        # merge the current activities with the new ones
        new, changed, deleted = aggregator.merge(current_activities,
                                                 activities)
        logger.debug('merge took %s', t.next())

        # new ones we insert, changed we do a delete and insert
        new_aggregated = self._update_from_diff(new, changed, deleted)
        new_aggregated = aggregator.rank(new_aggregated)

        # trim every now and then
        if trim and random.random() <= self.trim_chance:
            self.timeline_storage.trim(self.key, self.max_length)

        return new_aggregated
Пример #6
0
    def test_aggregated_add_many(self):
        # setup the pins and activity chunk
        t = timer()
        admin_user_id = 1
        aggregated = manager.get_feeds(admin_user_id)['aggregated']
        pins = list(Pin.objects.filter(user=admin_user_id)[:3])
        activities = []
        base_activity = pins[0].create_activity()
        sample_size = 1000
        for x in range(1, sample_size):
            activity = copy.deepcopy(base_activity)
            activity.actor_id = x
            activity.object_id = x
            activities.append(activity)
        aggregated.insert_activities(activities)

        for activity in activities:
            aggregated.add_many([activity], trim=False)
        add_many_time = t.next()
        print 'add many ran 10000 times, took %s' % add_many_time
        popular_user_time = 100000. / sample_size * add_many_time
        print 'popular user fanout would take %s seconds' % popular_user_time
Пример #7
0
    def test_aggregated_add_many(self):
        # setup the pins and activity chunk
        t = timer()
        admin_user_id = 1
        aggregated = manager.get_feeds(admin_user_id)["aggregated"]
        pins = list(Pin.objects.filter(user=admin_user_id)[:3])
        activities = []
        base_activity = pins[0].create_activity()
        sample_size = 1000
        for x in range(1, sample_size):
            activity = copy.deepcopy(base_activity)
            activity.actor_id = x
            activity.object_id = x
            activities.append(activity)
        aggregated.insert_activities(activities)

        for activity in activities:
            aggregated.add_many([activity], trim=False)
        add_many_time = t.next()
        print "add many ran 10000 times, took %s" % add_many_time
        popular_user_time = 100000.0 / sample_size * add_many_time
        print "popular user fanout would take %s seconds" % popular_user_time
Пример #8
0
    def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs):
        '''
        Adds many activities to the feed

        Unfortunately we can't support the batch interface.
        The writes depend on the reads.

        Also subsequent writes will depend on these writes.
        So no batching is possible at all.

        :param activities: the list of activities
        '''
        validate_list_of_strict(
            activities, (self.activity_class, FakeActivity))
        # start by getting the aggregator
        aggregator = self.get_aggregator()

        t = timer()
        # get the current aggregated activities
        if current_activities is None:
            current_activities = self[:self.merge_max_length]
        msg_format = 'reading %s items took %s'
        logger.debug(msg_format, self.merge_max_length, t.next())

        # merge the current activities with the new ones
        new, changed, deleted = aggregator.merge(
            current_activities, activities)
        logger.debug('merge took %s', t.next())

        # new ones we insert, changed we do a delete and insert
        new_aggregated = self._update_from_diff(new, changed, deleted)
        new_aggregated = aggregator.rank(new_aggregated)

        # trim every now and then
        if trim and random.random() <= self.trim_chance:
            self.timeline_storage.trim(self.key, self.max_length)

        return new_aggregated
Пример #9
0
def run_benchmark(benchmark, network_size, max_network_size, multiplier, duration):
    logger.info('Starting the benchmark! Exciting.... :)')
    
    if benchmark is None:
        benchmark_class = get_benchmark('stream_bench_custom')
        benchmark = benchmark_class(network_size, max_network_size, multiplier, duration)
    else:
        benchmark_class = get_benchmark(benchmark)
        benchmark = benchmark_class()
    
    logger.info('Running benchmark %s', benchmark.name)
    logger.info('Network size starting at %s will grow to %s', benchmark.network_size, benchmark.max_network_size)
    logger.info('Multiplier is set to %s and duration %s', benchmark.multiplier, benchmark.duration)
    metrics_instance = get_metrics_instance()

    social_model = benchmark.get_social_model()
    days = 0
    while True:
        logger.info(
            'Simulating a social network with network size %s', social_model.network_size)
        object_id = 1
        for x in range(benchmark.duration):
            days += 1
            social_model.day = days
            daily_tasks = collections.defaultdict(list)
            t = timer()
            metrics_instance.on_day_change(days)
            logger.debug('Day %s for our network', days)
            # create load based on the current model
            active_users = social_model.active_users
            for user_id in active_users:
                # follow other users, note that we don't actually store the follower
                #  lists for this benchmark
                for target_user_id in social_model.get_new_follows(user_id):
                    daily_tasks['follow_users'].append([user_id, target_user_id])
                    
                # create activities
                for x in range(social_model.get_user_activity(user_id)):
                    activity = create_activity(user_id, object_id)
                    object_id += 1
                    daily_tasks['add_activities'].append([user_id, activity])
                # read a few pages of data
                daily_tasks['read_feed_pages'].append([user_id])
                
            logger.debug('%s seconds spent creating the model', t.next())
            # send the daily tasks to celery
            batch_tasks = []
            for task_name, task_args in daily_tasks.items():
                task = getattr(tasks, task_name)
                for task_arg_chunk in chunks(task_args, 100):
                    task_signature = task.s(social_model, task_arg_chunk)
                    batch_tasks.append(task_signature)
            for signature in batch_tasks:
                signature.apply_async()
            logger.debug('%s seconds spent sending %s tasks', t.next(), len(batch_tasks))
            # wait
            #while True:
            #    if result.ready():
            #        break
            #    time.sleep(1)
            #    logger.debug('Waiting for day %s to finish' % days)
            
            logger.debug('Day %s finished', days)
            time.sleep(1)

        # grow the network
        logger.info('Growing the social network.....')
        social_model.network_size = social_model.network_size * benchmark.multiplier
        metrics_instance.on_network_size_change(social_model.network_size)
        if social_model.network_size >= benchmark.max_network_size:
            logger.info(
                'Reached the max users, we\'re done with our benchmark!')
Пример #10
0
def benchmark_aggregated_feed():
    t = timer()
    manager.feed_classes = {"aggregated": AggregatedFeed}
    manager.add_entry(1, 1)
    print "Benchmarking aggregated feed took: %0.2fs" % t.next()
Пример #11
0
def benchmark_flat_feed():
    t = timer()
    manager.feed_classes = {"flat": FashiolistaFeed}
    manager.add_entry(1, 1)
    print "Benchmarking flat feed took: %0.2fs" % t.next()
Пример #12
0
def benchmark_flat_feed():
    t = timer()
    manager.feed_classes = {'flat': FashiolistaFeed}
    manager.add_entry(1, 1)
    print "Benchmarking flat feed took: %0.2fs" % t.next()
Пример #13
0
def benchmark_aggregated_feed():
    t = timer()
    manager.feed_classes = {'aggregated': AggregatedFeed}
    manager.add_entry(1, 1)
    print "Benchmarking aggregated feed took: %0.2fs" % t.next()
Пример #14
0
def run_benchmark(benchmark, network_size, max_network_size, multiplier,
                  duration):
    logger.info('Starting the benchmark! Exciting.... :)')

    if benchmark is None:
        benchmark_class = get_benchmark('stream_bench_custom')
        benchmark = benchmark_class(network_size, max_network_size, multiplier,
                                    duration)
    else:
        benchmark_class = get_benchmark(benchmark)
        benchmark = benchmark_class()

    logger.info('Running benchmark %s', benchmark.name)
    logger.info('Network size starting at %s will grow to %s',
                benchmark.network_size, benchmark.max_network_size)
    logger.info('Multiplier is set to %s and duration %s',
                benchmark.multiplier, benchmark.duration)
    metrics_instance = get_metrics_instance()

    social_model = benchmark.get_social_model()
    days = 0
    while True:
        logger.info('Simulating a social network with network size %s',
                    social_model.network_size)
        object_id = 1
        for x in range(benchmark.duration):
            days += 1
            social_model.day = days
            daily_tasks = collections.defaultdict(list)
            t = timer()
            metrics_instance.on_day_change(days)
            logger.debug('Day %s for our network', days)
            # create load based on the current model
            active_users = social_model.active_users
            for user_id in active_users:
                # follow other users, note that we don't actually store the follower
                #  lists for this benchmark
                for target_user_id in social_model.get_new_follows(user_id):
                    daily_tasks['follow_users'].append(
                        [user_id, target_user_id])

                # create activities
                for x in range(social_model.get_user_activity(user_id)):
                    activity = create_activity(user_id, object_id)
                    object_id += 1
                    daily_tasks['add_activities'].append([user_id, activity])
                # read a few pages of data
                daily_tasks['read_feed_pages'].append([user_id])

            logger.debug('%s seconds spent creating the model', t.next())
            # send the daily tasks to celery
            batch_tasks = []
            for task_name, task_args in daily_tasks.items():
                task = getattr(tasks, task_name)
                for task_arg_chunk in chunks(task_args, 100):
                    task_signature = task.s(social_model, task_arg_chunk)
                    batch_tasks.append(task_signature)
            for signature in batch_tasks:
                signature.apply_async()
            logger.debug('%s seconds spent sending %s tasks', t.next(),
                         len(batch_tasks))
            # wait
            #while True:
            #    if result.ready():
            #        break
            #    time.sleep(1)
            #    logger.debug('Waiting for day %s to finish' % days)

            logger.debug('Day %s finished', days)
            time.sleep(1)

        # grow the network
        logger.info('Growing the social network.....')
        social_model.network_size = social_model.network_size * benchmark.multiplier
        metrics_instance.on_network_size_change(social_model.network_size)
        if social_model.network_size >= benchmark.max_network_size:
            logger.info(
                'Reached the max users, we\'re done with our benchmark!')