Пример #1
0
    def classify(self, objekts=None, attribute_name=None):
        """ Run :meth:`classify_one` on each member of :param:`objekts`.

        Returns a ``dict`` of the form:

            {
                    u'duration': <the duration in seconds, as integer>,
                },
            }
        """

        start_time   = pytime.time()
        seen_objects = 0
        error_types  = {}

        if objekts is None:
            objekts = self.iterables

        if attribute_name is None:
            attribute_name = self.attribute_name

        assert objekts is not None
        assert attribute_name is not None

        for objekt in objekts:

            error = self.classify_one(getattr(objekt, attribute_name), objekt)

            error_types[error] = error_types.setdefault(error, 0) + 1
            seen_objects += 1

        return {
            u'duration': pytime.time() - start_time,
            u'seen_objects': seen_objects,
            u'error_types': error_types,
            u'stored_instances': self.stored_instances,
        }
Пример #2
0
    def classify(self, objekts=None, attribute_name=None):
        """ Run :meth:`classify_one` on each member of :param:`objekts`.

        Returns a ``dict`` of the form:

            {
                    u'duration': <the duration in seconds, as integer>,
                },
            }
        """

        start_time = pytime.time()
        seen_objects = 0
        error_types = {}

        if objekts is None:
            objekts = self.iterables

        if attribute_name is None:
            attribute_name = self.attribute_name

        assert objekts is not None
        assert attribute_name is not None

        for objekt in objekts:

            error = self.classify_one(getattr(objekt, attribute_name), objekt)

            error_types[error] = error_types.setdefault(error, 0) + 1
            seen_objects += 1

        return {
            u'duration': pytime.time() - start_time,
            u'seen_objects': seen_objects,
            u'error_types': error_types,
            u'stored_instances': self.stored_instances,
        }
Пример #3
0
def feed_distribution_by_last_fetch():
    """ compute and group feeds by last_fetch delta from now. """

    start_time = pytime.time()

    # open_feeds = Feed.objects(Q(closed=False) | Q(closed__exists=False))
    open_feeds_count = Feed.objects.filter(closed__ne=True).count()

    lower_value   = None
    loop_count    = 0
    fetched_feeds = 0
    delta_lengths = (
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 6),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 2),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 2),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 6),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 12),
        timedelta(days=1),
        timedelta(days=2),
        timedelta(days=3),
        timedelta(days=4),
        timedelta(days=5),
        timedelta(days=6),
        timedelta(days=7),
        timedelta(days=10),
        None
    )

    results = {}

    for delta in delta_lengths:

        upper_value = (now() - delta) if delta else None

        if lower_value is None:
            kwargs = {'last_fetch__gt': upper_value}

        elif upper_value is None:
            kwargs = {'last_fetch__lte': lower_value}

        else:
            kwargs = {'last_fetch__lte': lower_value,
                      'last_fetch__gt': upper_value}

        feeds   = Feed.objects(**kwargs)
        count   = feeds.count()
        percent = float(count * 100.0 / open_feeds_count)
        avg_fi  = sum(f.fetch_interval for f in feeds) * 1.0 / (count or 1.0)

        results[loop_count] = [
            feeds,
            count,
            percent,
            lower_value,
            upper_value,
            avg_fi,
        ]

        fetched_feeds += count
        lower_value = upper_value
        loop_count += 1

    results['meta'] = {'fetched_feeds': fetched_feeds,
                       'open_feeds_count': open_feeds_count,
                       'duration': pytime.time() - start_time,
                       'loop_count': loop_count}

    return results
Пример #4
0
def feed_distribution_by_last_fetch():
    """ compute and group feeds by last_fetch delta from now. """

    start_time = pytime.time()

    # open_feeds = Feed.objects(Q(closed=False) | Q(closed__exists=False))
    open_feeds_count = Feed.objects.filter(closed__ne=True).count()

    lower_value = None
    loop_count = 0
    fetched_feeds = 0
    delta_lengths = (timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 6),
                     timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 2),
                     timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL),
                     timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 2),
                     timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 6),
                     timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL *
                               12), timedelta(days=1), timedelta(days=2),
                     timedelta(days=3), timedelta(days=4), timedelta(days=5),
                     timedelta(days=6), timedelta(days=7), timedelta(days=10),
                     None)

    results = {}

    for delta in delta_lengths:

        upper_value = (now() - delta) if delta else None

        if lower_value is None:
            kwargs = {'last_fetch__gt': upper_value}

        elif upper_value is None:
            kwargs = {'last_fetch__lte': lower_value}

        else:
            kwargs = {
                'last_fetch__lte': lower_value,
                'last_fetch__gt': upper_value
            }

        feeds = Feed.objects(**kwargs)
        count = feeds.count()
        percent = float(count * 100.0 / open_feeds_count)
        avg_fi = sum(f.fetch_interval for f in feeds) * 1.0 / (count or 1.0)

        results[loop_count] = [
            feeds,
            count,
            percent,
            lower_value,
            upper_value,
            avg_fi,
        ]

        fetched_feeds += count
        lower_value = upper_value
        loop_count += 1

    results['meta'] = {
        'fetched_feeds': fetched_feeds,
        'open_feeds_count': open_feeds_count,
        'duration': pytime.time() - start_time,
        'loop_count': loop_count
    }

    return results