Exemplo n.º 1
0
    def test_aggregated_add_many(self):
        # setup the pins and activity chunk
        t = timer()
        admin_user_id = 1
        aggregated = feedly.get_feeds(admin_user_id)['aggregated']
        pins = list(Pin.objects.filter(user=admin_user_id)[:3])
        activities = []
        base_activity = pins[0].create_activity()
        sample_size = 1000
        for x in range(1, sample_size):
            activity = copy.deepcopy(base_activity)
            activity.actor_id = x
            activity.object_id = x
            activities.append(activity)

        print 'running on %s' % settings.FEEDLY_CASSANDRA_HOSTS
        print 'inserting the many'
        aggregated.insert_activities(activities)
        print 'done, took %s' % t.next()

        for activity in activities:
            aggregated.add_many([activity], trim=False)
        add_many_time = t.next()
        print 'add many ran 10000 times, took %s' % add_many_time
        popular_user_time = 100000. / sample_size * add_many_time
        print 'popular user fanout would take %s seconds' % popular_user_time
Exemplo n.º 2
0
    def test_aggregated_add_many(self):
        # setup the pins and activity chunk
        t = timer()
        admin_user_id = 1
        aggregated = feedly.get_feeds(admin_user_id)['aggregated']
        pins = list(Pin.objects.filter(user=admin_user_id)[:3])
        activities = []
        base_activity = pins[0].create_activity()
        sample_size = 1000
        for x in range(1, sample_size):
            activity = copy.deepcopy(base_activity)
            activity.actor_id = x
            activity.object_id = x
            activities.append(activity)

        print 'running on %s' % settings.FEEDLY_CASSANDRA_HOSTS
        print 'inserting the many'
        aggregated.insert_activities(activities)
        print 'done, took %s' % t.next()

        for activity in activities:
            aggregated.add_many([activity], trim=False)
        add_many_time = t.next()
        print 'add many ran 10000 times, took %s' % add_many_time
        popular_user_time = 100000. / sample_size * add_many_time
        print 'popular user fanout would take %s seconds' % popular_user_time
Exemplo n.º 3
0
def remove_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Remove the activities from the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.remove_many operation for %s activities batch interface %s'
    logger.debug(msg_format, feed, len(activities), batch_interface)
    feed.remove_many(activities, trim=trim, batch_interface=batch_interface)
    logger.debug('remove many operation took %s seconds', t.next())
Exemplo n.º 4
0
def add_operation(feed, activities, trim=True, batch_interface=None):
    '''
    Add the activities to the feed
    functions used in tasks need to be at the main level of the module
    '''
    t = timer()
    msg_format = 'running %s.add_many operation for %s activities batch interface %s and trim %s'
    logger.debug(msg_format, feed, len(activities), batch_interface, trim)
    feed.add_many(activities, batch_interface=batch_interface, trim=trim)
    logger.debug('add many operation took %s seconds', t.next())
Exemplo n.º 5
0
    def add_many(self,
                 activities,
                 trim=True,
                 current_activities=None,
                 *args,
                 **kwargs):
        '''
        Adds many activities to the feed

        Unfortunately we can't support the batch interface.
        The writes depend on the reads.

        Also subsequent writes will depend on these writes.
        So no batching is possible at all.

        :param activities: the list of activities
        '''
        validate_list_of_strict(activities,
                                (self.activity_class, FakeActivity))
        # start by getting the aggregator
        aggregator = self.get_aggregator()

        t = timer()
        # get the current aggregated activities
        if current_activities is None:
            current_activities = self[:self.merge_max_length]
        msg_format = 'reading %s items took %s'
        logger.debug(msg_format, self.merge_max_length, t.next())

        # merge the current activities with the new ones
        new, changed, deleted = aggregator.merge(current_activities,
                                                 activities)
        logger.debug('merge took %s', t.next())

        # new ones we insert, changed we do a delete and insert
        new_aggregated = self._update_from_diff(new, changed, deleted)
        new_aggregated = aggregator.rank(new_aggregated)

        # trim every now and then
        if trim and random.random() <= self.trim_chance:
            self.timeline_storage.trim(self.key, self.max_length)

        return new_aggregated
Exemplo n.º 6
0
    def test_large_remove_activity(self):
        # first built a large feed
        self.test_feed.max_length = 3600
        activities = []
        choices = [LoveVerb, AddVerb]
        for i in range(1, 3600):
            verb = choices[i % 2]
            activity = FakeActivity(
                i, verb, i, i, datetime.datetime.now() - datetime.timedelta(days=i))
            activities.append(activity)
        self.test_feed.insert_activities(activities)
        self.test_feed.add_many(activities)

        to_remove = activities[200:700]
        remove_count = len(to_remove)
        feed_count = self.test_feed.count()
        t = timer()
        self.test_feed.remove_many(to_remove)
        msg_format = 'removing %s items from a feed of %s took %s seconds'
        print msg_format % (remove_count, feed_count, t.next())
Exemplo n.º 7
0
    def test_large_remove_activity(self):
        # first built a large feed
        self.test_feed.max_length = 3600
        activities = []
        choices = [LoveVerb, AddVerb]
        for i in range(1, 3600):
            verb = choices[i % 2]
            activity = FakeActivity(
                i, verb, i, i,
                datetime.datetime.now() - datetime.timedelta(days=i))
            activities.append(activity)
        self.test_feed.insert_activities(activities)
        self.test_feed.add_many(activities)

        to_remove = activities[200:700]
        remove_count = len(to_remove)
        feed_count = self.test_feed.count()
        t = timer()
        self.test_feed.remove_many(to_remove)
        msg_format = 'removing %s items from a feed of %s took %s seconds'
        print msg_format % (remove_count, feed_count, t.next())
Exemplo n.º 8
0
    def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs):
        '''
        Adds many activities to the feed

        Unfortunately we can't support the batch interface.
        The writes depend on the reads.

        Also subsequent writes will depend on these writes.
        So no batching is possible at all.

        :param activities: the list of activities
        '''
        if activities and not isinstance(activities[0], Activity):
            raise ValueError('Expecting Activity not %s' % activities)
        # start by getting the aggregator
        aggregator = self.get_aggregator()

        t = timer()
        # get the current aggregated activities
        if current_activities is None:
            current_activities = self[:self.merge_max_length]
        msg_format = 'reading %s items took %s'
        logger.info(msg_format, self.merge_max_length, t.next())

        # merge the current activities with the new ones
        new, changed, deleted = aggregator.merge(
            current_activities, activities)
        logger.info('merge took %s', t.next())

        # new ones we insert, changed we do a delete and insert
        new_aggregated = self._update_from_diff(new, changed, deleted)
        new_aggregated = aggregator.rank(new_aggregated)

        # trim every now and then
        if trim and random.random() <= self.trim_chance:
            self.timeline_storage.trim(self.key, self.max_length)

        return new_aggregated
Exemplo n.º 9
0
def benchmark_flat_feed():
    t = timer()
    manager.feed_classes = {'flat': FashiolistaFeed}
    manager.add_entry(1, 1)
    print "Benchmarking flat feed took: %0.2fs" % t.next()
Exemplo n.º 10
0
def benchmark_aggregated_feed():
    t = timer()
    manager.feed_classes = {'aggregated': AggregatedFeed}
    manager.add_entry(1, 1)
    print "Benchmarking aggregated feed took: %0.2fs" % t.next()
Exemplo n.º 11
0
def benchmark_flat_feed():
    t = timer()
    manager.feed_classes = {'flat': FashiolistaFeed}
    manager.add_entry(1, 1)
    print "Benchmarking flat feed took: %0.2fs" % t.next()
Exemplo n.º 12
0
def benchmark_aggregated_feed():
    t = timer()
    manager.feed_classes = {'aggregated': AggregatedFeed}
    manager.add_entry(1, 1)
    print "Benchmarking aggregated feed took: %0.2fs" % t.next()