Пример #1
0
def last_checked(row, log: ResultLog):
    """Data was checked within a reasonable timeframe"""

    target_date = row.targetDateEt.to_pydatetime()
    updated_at = row.lastUpdateEt.to_pydatetime()
    checked_at = row.lastCheckEt.to_pydatetime()

    if checked_at <= START_OF_TIME:
        phase = row.phase
        if phase == "inactive":
            pass
        elif phase in ["publish", "update"]:
            log.error(row.state, f"check needed")
        elif phase in ["prepare", "cleanup"]:
            log.info(row.state, f"check needed")
        return

    delta = updated_at - checked_at
    hours = delta.total_seconds() / (60.0 * 60)
    if hours > 2.0:
        s_updated = updated_at.strftime('%m/%d %H:%M')
        s_checked = checked_at.strftime('%m/%d %H:%M')
        log.error(row.state, f"updated since last check: {hours:.0f} hours ago at {s_updated}, checked at {s_checked}")
        return

    delta = target_date - updated_at
    hours = delta.total_seconds() / (60.0 * 60)
    if hours > 12.0:
        s_checked = checked_at.strftime('%m/%d %H:%M')
        log.warning(row.state, f"source has not been checked in {hours:.0f} hours at {s_checked}")
        return
Пример #2
0
def pendings_rate(row, log: ResultLog):
    """Check that pendings are not more than 20% of total"""

    n_pos, n_neg, n_pending = row.positive, row.negative, row.pending
    n_tot = n_pos + n_neg
    percent_pending = 100.0 * n_pending / n_tot if n_tot > 0 else 0.0

    if n_tot > 1000:
        if percent_pending > 20.0:
            log.warning(row.state, f"too many pending {percent_pending:.0f}% (pending={n_pending:,}, total={n_tot:,})")
    else:
        if percent_pending > 80.0:
            log.warning(row.state, f"too many pending {percent_pending:.0f}% (pending={n_pending:,}, total={n_tot:,})")
Пример #3
0
def increasing_values(row, df: pd.DataFrame, log: ResultLog):
    """Check that new values more than previous values

    df contains the historical values (newest first).  offset controls how many days to look back.
    """

    df = df[df.date < row.targetDate]

    #print(df)
    #exit(-1)

    dict_row = row._asdict()

    for c in ["positive", "negative", "death", "total"]:
        val = dict_row[c]
        vec = df[c].values
        prev_val = vec[0] if vec.size > 0 else 0

        if val < prev_val:
            log.error(row.state, f"{c} value ({val:,}) is less than prior value ({prev_val:,})")

        # allow value to be the same if below a threshold
        if val < IGNORE_THRESHOLDS[c]: continue

        phase = row.phase
        checked_at = row.lastCheckEt.to_pydatetime()
        is_check_field_set = checked_at > START_OF_TIME

        if val == prev_val:
            n_days, d = days_since_change(val, df[c], df["date"])
            if n_days >= 0:
                d = str(d)
                d = d[4:6] + "/" + d[6:8]

                if prev_val >= 20 and (is_check_field_set or phase in ["publish", "update"]):
                    log.error(row.state, f"{c} value ({val:,}) has not changed since {d} ({n_days} days)")
                else:
                    log.warning(row.state, f"{c} value ({val:,}) has not changed since {d} ({n_days} days)")
            else:
                log.error(row.state, f"{c} value ({val:,}) constant for all time")
            continue

        p_observed = 100.0 * val / prev_val - 100.0

        #TODO: estimate expected increase from recent history
        p_min, p_max = EXPECTED_PERCENT_THRESHOLDS[c]
        if p_observed < p_min or p_observed > p_max:
            log.warning(row.state, f"{c} value ({val:,}) is a {p_observed:.0f}% increase, expected: {p_min:.0f} to {p_max:.0f}%")