示例#1
0
    def test_incr_and_curr_stats(self):
        """
        Check that ichnaea.content.tasks.{incr_stat,get_curr_stat}
        work on the most-recent-stat associated with a given day,
        and copy forward each day's stat to the next day, as they go.
        """
        from ichnaea.content.tasks import incr_stat, get_curr_stat
        session = self.db_master_session
        today = datetime.utcnow().date()
        yesterday = (today - timedelta(1))
        two_days = (today - timedelta(2))

        for stat in ['deleted_cell', 'deleted_wifi']:
            p = 0
            for i in range(1, 10):
                p += i
                incr_stat(session, stat, i, two_days)
                self.assertEqual(get_curr_stat(session, stat, two_days), p)
                self.assertEqual(get_curr_stat(session, stat, yesterday), p)
                self.assertEqual(get_curr_stat(session, stat, today), p)
                self.assertEqual(get_curr_stat(session, stat), p)
            q = p
            for i in range(1, 10):
                q += i
                incr_stat(session, stat, i, yesterday)
                self.assertEqual(get_curr_stat(session, stat, two_days), p)
                self.assertEqual(get_curr_stat(session, stat, yesterday), q)
                self.assertEqual(get_curr_stat(session, stat, today), q)
                self.assertEqual(get_curr_stat(session, stat), q)
            r = q
            for i in range(1, 10):
                r += i
                incr_stat(session, stat, i, today)
                self.assertEqual(get_curr_stat(session, stat, two_days), p)
                self.assertEqual(get_curr_stat(session, stat, yesterday), q)
                self.assertEqual(get_curr_stat(session, stat, today), r)
                self.assertEqual(get_curr_stat(session, stat), r)
示例#2
0
def trim_excessive_data(session, unique_model, measure_model,
                        join_measure, delstat, max_measures,
                        min_age_days, batch):
    """
    Delete measurements of type `measure_model` when, for any given
    key-field `kname`, there are more than `max_measures` measurements.
    Avoid deleting any measurements at all younger than `min_age_days`,
    and only delete measurements from at most `batch` keys per call.
    Increment the deleted-measurements stat named `delstat` and decrement
    the `total_measurements` field of the associated `unique_model`, as
    side effects.
    """
    from ichnaea.content.tasks import incr_stat

    # generally: only work with rows that are older than a
    # date threshold, so that we are definitely not interfering
    # with periodic recent-stat calculations on incoming new data
    utcnow = datetime.utcnow()
    age_threshold = utcnow - timedelta(days=min_age_days)
    age_cond = measure_model.created < age_threshold

    # initial (fast) query to pull out those uniques that have
    # total_measures larger than max_measures; will refine this
    # set of keys subsequently by date-window.
    query = session.query(unique_model).filter(
        unique_model.total_measures > max_measures).limit(batch)
    uniques = query.all()
    counts = []

    # secondarily, refine set of candidate keys by explicitly
    # counting measurements on each key, within the expiration
    # date-window.
    for u in uniques:

        query = session.query(func.count(measure_model.id)).filter(
            *join_measure(u)).filter(
            age_cond)

        c = query.first()
        assert c is not None
        n = int(c[0])
        if n > max_measures:
            counts.append((u, n))

    if len(counts) == 0:
        return 0

    # finally, for each definitely over-measured key, find a
    # cutoff row and trim measurements to it
    for (u, count) in counts:

        # determine the oldest measure (smallest (date,id) pair) to
        # keep for each key
        start = count - max_measures
        (smallest_date_to_keep, smallest_id_to_keep) = session.query(
            measure_model.time, measure_model.id).filter(
            *join_measure(u)).filter(
            age_cond).order_by(
            measure_model.time, measure_model.id).slice(start, count).first()

        # delete measures with (date,id) less than that, so long as they're
        # older than the date window.
        n = session.query(
            measure_model).filter(
            *join_measure(u)).filter(
            age_cond).filter(
            measure_model.time <= smallest_date_to_keep).filter(
            measure_model.id < smallest_id_to_keep).delete()

        # decrement model.total_measures; increment stats[delstat]
        assert u.total_measures >= 0
        u.total_measures -= n
        # if there's a lot of unprocessed new measures, forget them
        # and only retain the ones we still have the underlying measures for
        if u.new_measures > u.total_measures:
            u.new_measures = u.total_measures
        incr_stat(session, delstat, n)

    session.commit()
    return n
示例#3
0
def trim_excessive_data(session, unique_model, measure_model, join_measure,
                        delstat, max_measures, min_age_days, batch):
    """
    Delete measurements of type `measure_model` when, for any given
    key-field `kname`, there are more than `max_measures` measurements.
    Avoid deleting any measurements at all younger than `min_age_days`,
    and only delete measurements from at most `batch` keys per call.
    Increment the deleted-measurements stat named `delstat` and decrement
    the `total_measurements` field of the associated `unique_model`, as
    side effects.
    """
    from ichnaea.content.tasks import incr_stat

    # generally: only work with rows that are older than a
    # date threshold, so that we are definitely not interfering
    # with periodic recent-stat calculations on incoming new data
    utcnow = datetime.utcnow()
    age_threshold = utcnow - timedelta(days=min_age_days)
    age_cond = measure_model.created < age_threshold

    # initial (fast) query to pull out those uniques that have
    # total_measures larger than max_measures; will refine this
    # set of keys subsequently by date-window.
    query = session.query(unique_model).filter(
        unique_model.total_measures > max_measures).limit(batch)
    uniques = query.all()
    counts = []

    # secondarily, refine set of candidate keys by explicitly
    # counting measurements on each key, within the expiration
    # date-window.
    for u in uniques:

        query = session.query(func.count(
            measure_model.id)).filter(*join_measure(u)).filter(age_cond)

        c = query.first()
        assert c is not None
        n = int(c[0])
        if n > max_measures:
            counts.append((u, n))

    if len(counts) == 0:
        return 0

    # finally, for each definitely over-measured key, find a
    # cutoff row and trim measurements to it
    for (u, count) in counts:

        # determine the oldest measure (smallest (date,id) pair) to
        # keep for each key
        start = count - max_measures
        (smallest_date_to_keep, smallest_id_to_keep) = session.query(
            measure_model.time, measure_model.id).filter(
                *join_measure(u)).filter(age_cond).order_by(
                    measure_model.time, measure_model.id).slice(start,
                                                                count).first()

        # delete measures with (date,id) less than that, so long as they're
        # older than the date window.
        n = session.query(measure_model).filter(
            *join_measure(u)).filter(age_cond).filter(
                measure_model.time <= smallest_date_to_keep).filter(
                    measure_model.id < smallest_id_to_keep).delete()

        # decrement model.total_measures; increment stats[delstat]
        assert u.total_measures >= 0
        u.total_measures -= n
        # if there's a lot of unprocessed new measures, forget them
        # and only retain the ones we still have the underlying measures for
        if u.new_measures > u.total_measures:
            u.new_measures = u.total_measures
        incr_stat(session, delstat, n)

    session.commit()
    return n