Exemplo n.º 1
0
def find_last_change(val, vec_vals: pd.Series, vec_date) -> Tuple[int, datetime]:
    vals = vec_vals.values
    for i in range(len(vals)):
        if vals[i] != val: 
            sdate = str(vec_date.values[i])
            d = datetime(int(sdate[0:4]), int(sdate[4:6]), int(sdate[6:8]))
            return vals[i], udatetime.naivedatetime_as_eastern(d)
    return 0, None
Exemplo n.º 2
0
def increasing_values(row,
                      df: pd.DataFrame,
                      log: ResultLog,
                      config: QCConfig = None) -> bool:
    """Check that new values more than previous values

    df contains the historical values (newest first).  offset controls how many days to look back.
    consolidate lines if everything changed

    return False if it looks like we have no new data for this source so we can bypass other tests
    """

    if not config: config = QCConfig()

    df = df[df.date < row.targetDate]

    dict_row = row._asdict()

    # local time is an editable field that it supposed to be the last time the data changed.
    # last_updated is the same value but adjusted to eastern TZ
    if "localTime" in dict_row:
        local_time = row.localTime
        d_local = local_time.year * 10000 + local_time.month * 100 + local_time.day
    else:
        local_time = None
        d_local = 0

    last_updated = row.lastUpdateEt
    d_updated = last_updated.year * 10000 + last_updated.month * 100 + last_updated.day

    # target date of run
    s_target = str(row.targetDate)
    d_target = datetime(int(s_target[0:4]), int(s_target[4:6]),
                        int(s_target[6:8]))
    d_target = udatetime.naivedatetime_as_eastern(d_target)

    d_last_change = udatetime.naivedatetime_as_eastern(datetime(2020, 1, 1))

    debug = config.enable_debug

    if debug: logger.debug(f"check {row.state}")

    fieldList = [
        "positive", "negative", "death", "hospitalizedCumulative",
        "inIcuCumulative", "onVentilatorCumulative"
    ]
    displayList = [
        "positive", "negative", "death", "hospitalized", "icu", "ventilator"
    ]

    source_messages = []
    has_issues, consolidate, n_days, n_days_prev = False, True, -1, 0
    for c in fieldList:
        val = dict_row.get(c)
        if val is None:
            log.internal(row.state, f"{c} missing column")
            has_issues, consolidate = True, False
            if debug: logger.debug(f"  {c} missing column")
            continue
        if not c in df.columns:
            log.internal(row.state, f"{c} missing history column")
            has_issues, consolidate = True, False
            if debug: logger.debug(f"  {c} missing history column")
            continue

        vec = df[c].values

        prev_val = vec[0] if vec.size > 0 else 0
        prev_date = df["date"].iloc[0] if vec.size > 0 else 0

        if val < prev_val and (val > 0 and prev_val != 0
                               ):  # negative values indicate blank/errors
            sd = str(prev_date)[4:] if prev_date > 0 else "-"
            sd = sd[0:2] + "/" + sd[2:4]
            log.data_quality(
                row.state,
                f"{c} ({val:,}) decreased from {prev_val:,} as-of {sd}")
            has_issues, consolidate = True, False
            if debug:
                logger.debug(
                    f"  {c} ({val:,}) decreased from {prev_val:,} as-of {sd}")
            continue

        # allow value to be the same if below a threshold, default to 10
        t = IGNORE_THRESHOLDS.get(c)
        if t == None: t = 10
        if val < t:
            if debug:
                logger.debug(
                    f"  {c} ({val:,}) is below threshold -> ignore 'same' check"
                )
            continue

        #phase = row.phase
        #checked_at = row.lastCheckEt.to_pydatetime()
        #is_check_field_set = checked_at > START_OF_TIME

        if val == -1000:
            log.data_entry(row.state,
                           f"{c} value cannot be converted to a number")
            has_issues, consolidate = True, False
            if debug: logger.debug(f"  {c} was not a number in source data")
            continue

        if val == prev_val:
            changed_val, changed_date = find_last_change(
                val, df[c], df["date"])

            n_days = int(
                (d_target - changed_date).total_seconds() // (60 * 60 * 24))
            if n_days >= 0:
                d_last_change = max(d_last_change, changed_date)

                source_messages.append(
                    f"{c} ({val:,}) hasn't changed since {changed_date.month}/{changed_date.day} ({n_days} days)"
                )

                # check if we can still consolidate results
                if n_days_prev == 0:
                    n_days_prev = n_days
                    if debug:
                        logger.debug(
                            f"  {c} ({val:,}) hasn't changed since {changed_date.month}/{changed_date.day} ({n_days} days)"
                        )
                elif n_days_prev == n_days:
                    if debug:
                        logger.debug(
                            f"  {c} ({val:,}) also hasn't changed since {changed_date.month}/{changed_date.day}"
                        )
                else:
                    consolidate = False
                    if debug:
                        logger.debug(
                            f"  {c} ({val:,}) hasn't changed since {changed_date.month}/{changed_date.day} ({n_days} days ago) -> force individual lines "
                        )
            else:
                d_last_change = max(d_last_change, df["date"].values[-1])
                has_issues, consolidate = True, False
                log.data_source(row.state,
                                f"{c} ({val:,}) constant for all time")
                if debug:
                    logger.debug(
                        f"  {c} ({val:,}) constant -> force individual lines ")
        else:
            consolidate = False
            if debug:
                logger.debug(
                    f"  {c} ({val:,}) changed from {prev_val:,} on {prev_date}"
                )

    if len(source_messages) == 0:
        if debug:
            logger.debug(f"  no source messages -> has_issues={has_issues}")
        return has_issues

    # alert if local time appears to updated incorrectly
    if d_local != 0 and d_local != d_updated:
        sd = str(d_last_change)
        sd = sd[4:6] + "/" + sd[6:8]
        sd_local = f"{local_time.month}/{local_time.day} {local_time.hour:02}:{local_time.minute:02}"
        checker = row.checker
        if checker == "": checker = "??"
        log.data_entry(
            row.state,
            f"checker {checker} set local time (column V) to {sd_local} but values haven't changed since {sd} ({n_days:.0f} days ago)"
        )
        #has_issues = True
        if debug:
            logger.debug(
                f"  checker {checker} set local time (column V) to {sd_local} but values haven't changed since {sd} ({n_days:.0f} days ago)"
            )

    if consolidate:
        names = "/".join(displayList)
        log.data_source(
            row.state,
            f"cumulative values ({names}) haven't changed since {d_last_change.month}/{d_last_change.day} ({n_days:.0f} days)"
        )
        if debug:
            logger.debug(
                f"  cumulative values ({names}) haven't changed since {d_last_change.month}/{d_last_change.day} ({n_days:.0f} days)"
            )
    else:
        for m in source_messages:
            log.data_source(row.state, m)
        if debug:
            logger.debug(
                f"  {row.state}: record {len(source_messages)} source issue(s) to log"
            )
    return has_issues
Exemplo n.º 3
0
from datetime import datetime
from loguru import logger
import pandas as pd
import numpy as np
from typing import Tuple

from app.util import udatetime

from .qc_config import QCConfig
from .log.result_log import ResultLog
from .modeling.forecast import Forecast
from .modeling.forecast_plot import plot_to_file
from .modeling.forecast_io import save_forecast_hd5, load_forecast_hd5

START_OF_TIME = udatetime.naivedatetime_as_eastern(datetime(2020, 1, 2))


def current_time_and_phase() -> Tuple[datetime, str]:
    "get the current time (ET) and phase of the process given the hour"

    target_time = udatetime.now_as_eastern()

    hour = target_time.hour

    # note -- these are all just guesses on a mental model of 1 update per day. Josh
    phase = ""
    if hour < 10:
        phase = "inactive"
    elif hour < 12 + 2:
        phase = "prepare"  # preparing for run