def last_checked(row, log: ResultLog): """Data was checked within a reasonable timeframe""" target_date = row.targetDateEt.to_pydatetime() updated_at = row.lastUpdateEt.to_pydatetime() checked_at = row.lastCheckEt.to_pydatetime() if checked_at <= START_OF_TIME: phase = row.phase if phase == "inactive": pass elif phase in ["publish", "update"]: log.error(row.state, f"check needed") elif phase in ["prepare", "cleanup"]: log.info(row.state, f"check needed") return delta = updated_at - checked_at hours = delta.total_seconds() / (60.0 * 60) if hours > 2.0: s_updated = updated_at.strftime('%m/%d %H:%M') s_checked = checked_at.strftime('%m/%d %H:%M') log.error(row.state, f"updated since last check: {hours:.0f} hours ago at {s_updated}, checked at {s_checked}") return delta = target_date - updated_at hours = delta.total_seconds() / (60.0 * 60) if hours > 12.0: s_checked = checked_at.strftime('%m/%d %H:%M') log.warning(row.state, f"source has not been checked in {hours:.0f} hours at {s_checked}") return
def pendings_rate(row, log: ResultLog): """Check that pendings are not more than 20% of total""" n_pos, n_neg, n_pending = row.positive, row.negative, row.pending n_tot = n_pos + n_neg percent_pending = 100.0 * n_pending / n_tot if n_tot > 0 else 0.0 if n_tot > 1000: if percent_pending > 20.0: log.warning(row.state, f"too many pending {percent_pending:.0f}% (pending={n_pending:,}, total={n_tot:,})") else: if percent_pending > 80.0: log.warning(row.state, f"too many pending {percent_pending:.0f}% (pending={n_pending:,}, total={n_tot:,})")
def increasing_values(row, df: pd.DataFrame, log: ResultLog): """Check that new values more than previous values df contains the historical values (newest first). offset controls how many days to look back. """ df = df[df.date < row.targetDate] #print(df) #exit(-1) dict_row = row._asdict() for c in ["positive", "negative", "death", "total"]: val = dict_row[c] vec = df[c].values prev_val = vec[0] if vec.size > 0 else 0 if val < prev_val: log.error(row.state, f"{c} value ({val:,}) is less than prior value ({prev_val:,})") # allow value to be the same if below a threshold if val < IGNORE_THRESHOLDS[c]: continue phase = row.phase checked_at = row.lastCheckEt.to_pydatetime() is_check_field_set = checked_at > START_OF_TIME if val == prev_val: n_days, d = days_since_change(val, df[c], df["date"]) if n_days >= 0: d = str(d) d = d[4:6] + "/" + d[6:8] if prev_val >= 20 and (is_check_field_set or phase in ["publish", "update"]): log.error(row.state, f"{c} value ({val:,}) has not changed since {d} ({n_days} days)") else: log.warning(row.state, f"{c} value ({val:,}) has not changed since {d} ({n_days} days)") else: log.error(row.state, f"{c} value ({val:,}) constant for all time") continue p_observed = 100.0 * val / prev_val - 100.0 #TODO: estimate expected increase from recent history p_min, p_max = EXPECTED_PERCENT_THRESHOLDS[c] if p_observed < p_min or p_observed > p_max: log.warning(row.state, f"{c} value ({val:,}) is a {p_observed:.0f}% increase, expected: {p_min:.0f} to {p_max:.0f}%")