def classify(self, objekts=None, attribute_name=None): """ Run :meth:`classify_one` on each member of :param:`objekts`. Returns a ``dict`` of the form: { u'duration': <the duration in seconds, as integer>, }, } """ start_time = pytime.time() seen_objects = 0 error_types = {} if objekts is None: objekts = self.iterables if attribute_name is None: attribute_name = self.attribute_name assert objekts is not None assert attribute_name is not None for objekt in objekts: error = self.classify_one(getattr(objekt, attribute_name), objekt) error_types[error] = error_types.setdefault(error, 0) + 1 seen_objects += 1 return { u'duration': pytime.time() - start_time, u'seen_objects': seen_objects, u'error_types': error_types, u'stored_instances': self.stored_instances, }
def feed_distribution_by_last_fetch(): """ compute and group feeds by last_fetch delta from now. """ start_time = pytime.time() # open_feeds = Feed.objects(Q(closed=False) | Q(closed__exists=False)) open_feeds_count = Feed.objects.filter(closed__ne=True).count() lower_value = None loop_count = 0 fetched_feeds = 0 delta_lengths = ( timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 6), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 2), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 2), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 6), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 12), timedelta(days=1), timedelta(days=2), timedelta(days=3), timedelta(days=4), timedelta(days=5), timedelta(days=6), timedelta(days=7), timedelta(days=10), None ) results = {} for delta in delta_lengths: upper_value = (now() - delta) if delta else None if lower_value is None: kwargs = {'last_fetch__gt': upper_value} elif upper_value is None: kwargs = {'last_fetch__lte': lower_value} else: kwargs = {'last_fetch__lte': lower_value, 'last_fetch__gt': upper_value} feeds = Feed.objects(**kwargs) count = feeds.count() percent = float(count * 100.0 / open_feeds_count) avg_fi = sum(f.fetch_interval for f in feeds) * 1.0 / (count or 1.0) results[loop_count] = [ feeds, count, percent, lower_value, upper_value, avg_fi, ] fetched_feeds += count lower_value = upper_value loop_count += 1 results['meta'] = {'fetched_feeds': fetched_feeds, 'open_feeds_count': open_feeds_count, 'duration': pytime.time() - start_time, 'loop_count': loop_count} return results
def feed_distribution_by_last_fetch(): """ compute and group feeds by last_fetch delta from now. """ start_time = pytime.time() # open_feeds = Feed.objects(Q(closed=False) | Q(closed__exists=False)) open_feeds_count = Feed.objects.filter(closed__ne=True).count() lower_value = None loop_count = 0 fetched_feeds = 0 delta_lengths = (timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 6), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 2), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 2), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 6), timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 12), timedelta(days=1), timedelta(days=2), timedelta(days=3), timedelta(days=4), timedelta(days=5), timedelta(days=6), timedelta(days=7), timedelta(days=10), None) results = {} for delta in delta_lengths: upper_value = (now() - delta) if delta else None if lower_value is None: kwargs = {'last_fetch__gt': upper_value} elif upper_value is None: kwargs = {'last_fetch__lte': lower_value} else: kwargs = { 'last_fetch__lte': lower_value, 'last_fetch__gt': upper_value } feeds = Feed.objects(**kwargs) count = feeds.count() percent = float(count * 100.0 / open_feeds_count) avg_fi = sum(f.fetch_interval for f in feeds) * 1.0 / (count or 1.0) results[loop_count] = [ feeds, count, percent, lower_value, upper_value, avg_fi, ] fetched_feeds += count lower_value = upper_value loop_count += 1 results['meta'] = { 'fetched_feeds': fetched_feeds, 'open_feeds_count': open_feeds_count, 'duration': pytime.time() - start_time, 'loop_count': loop_count } return results