Exemplo n.º 1
0
def next_missing_day(source, signals):
    """Fetch the first day for which we want to generate new data."""
    meta_df = covidcast.metadata()
    meta_df = meta_df[meta_df["data_source"] == source]
    meta_df = meta_df[meta_df["signal"].isin(signals)]
    # min: use the max_time of the most lagged signal, in case they differ
    # +timedelta: the subsequent day is the first day of new data to generate
    day = min(meta_df["max_time"]) + timedelta(days=1)
    return day
def main(args):
    """Generate data for the signal dashboard.

    `args`: parsed command-line arguments
    """
    log_file = None
    if args:
        log_file = args.log_file

    logger = get_structured_logger("signal_dash_data_generator",
                                   filename=log_file,
                                   log_exceptions=False)
    start_time = time.time()

    database = Database()

    signals_to_generate = database.get_enabled_signals()
    logger.info(
        "Starting generating dashboard data.",
        enabled_signals=[signal.name for signal in signals_to_generate])

    metadata = covidcast.metadata()

    signal_status_list: List[DashboardSignalStatus] = []
    coverage_list: List[DashboardSignalCoverage] = []

    for dashboard_signal in signals_to_generate:
        latest_issue = get_latest_issue_from_metadata(dashboard_signal,
                                                      metadata)
        latest_time_value = get_latest_time_value_from_metadata(
            dashboard_signal, metadata)
        latest_coverage = get_coverage(dashboard_signal, metadata)

        signal_status_list.append(
            DashboardSignalStatus(signal_id=dashboard_signal.db_id,
                                  date=datetime.date.today(),
                                  latest_issue=latest_issue,
                                  latest_time_value=latest_time_value))
        coverage_list.extend(latest_coverage)

    try:
        database.write_status(signal_status_list)
        logger.info("Wrote status.", rowcount=database.rowcount())
    except mysql.connector.Error as exception:
        logger.exception(exception)

    try:
        database.write_coverage(coverage_list)
        logger.info("Wrote coverage.", rowcount=database.rowcount())
    except mysql.connector.Error as exception:
        logger.exception(exception)

    logger.info("Generated signal dashboard data",
                total_runtime_in_seconds=round(time.time() - start_time, 2))
    return True
Exemplo n.º 3
0
def get_geo_signal_combos(data_source):
    """
    Get list of geo type-signal type combinations that we expect to see, based on
    combinations reported available by COVIDcast metadata.
    """
    meta = covidcast.metadata()
    source_meta = meta[meta['data_source'] == data_source]
    # Need to convert np.records to tuples so they are hashable and can be used in sets and dicts.
    geo_signal_combos = list(
        map(tuple, source_meta[["geo_type",
                                "signal"]].to_records(index=False)))

    print("Number of expected geo region-signal combinations:",
          len(geo_signal_combos))

    return geo_signal_combos
Exemplo n.º 4
0
def get_geo_signal_combos(data_source):
    """
    Get list of geo type-signal type combinations that we expect to see.

    Cross references based on combinations reported available by COVIDcast metadata.
    """
    # Maps data_source name with what's in the API, lists used in case of multiple names
    source_signal_mappings = {
        'chng': ['chng-cli', 'chng-covid'],
        'indicator-combination': ['indicator-combination-cases-deaths'],
        'quidel': ['quidel-covid-ag'],
        'safegraph': ['safegraph-weekly']
    }
    meta = covidcast.metadata()
    source_meta = meta[meta['data_source'] == data_source]
    # Need to convert np.records to tuples so they are hashable and can be used in sets and dicts.
    geo_signal_combos = list(map(tuple,
                                 source_meta[["geo_type", "signal"]].to_records(index=False)))
    # Only add new geo_sig combos if status is active
    new_geo_signal_combos = []
    # Use a seen dict to save on multiple calls:
    # True/False indicate if status is active, "unknown" means we should check
    sig_combo_seen = dict()
    for combo in geo_signal_combos:
        if source_signal_mappings.get(data_source):
            src_list = source_signal_mappings.get(data_source)
        else:
            src_list = [data_source]
        for src in src_list:
            sig = combo[1]
            geo_status = sig_combo_seen.get((sig, src), "unknown")
            if geo_status is True:
                new_geo_signal_combos.append(combo)
            elif geo_status == "unknown":
                epidata_signal = requests.get(
                    "https://api.covidcast.cmu.edu/epidata/covidcast/meta",
                    params={'signal': f"{src}:{sig}"})
                # Not an active signal
                active_status = [val['active'] for i in epidata_signal.json()
                    for val in i['signals']]
                if active_status == []:
                    sig_combo_seen[(sig, src)] = False
                    continue
                sig_combo_seen[(sig, src)] = active_status[0]
                if active_status[0] is True:
                    new_geo_signal_combos.append(combo)
    return new_geo_signal_combos
Exemplo n.º 5
0
def public_signal(signal):
    """Check if the signal name is already public using COVIDcast.

    Parameters
    ----------
    signal : str
        Name of the signal
    Returns
    -------
    bool
        True if the signal is present
        False if the signal is not present
    """
    epidata_df = covidcast.metadata()
    for index in range(len(epidata_df)):
        if epidata_df["signal"][index] == signal:
            return True
    return False
Exemplo n.º 6
0
def run_module():
    start_time = time.time()
    params = read_params()
    meta = covidcast.metadata()
    slack_notifier = None
    if "channel" in params and "slack_token" in params:
        slack_notifier = SlackNotifier(params["channel"],
                                       params["slack_token"])

    complaints = []
    for data_source in params["sources"].keys():
        complaints.extend(
            check_source(data_source, meta, params["sources"],
                         params.get("grace", 0), LOGGER))

    if len(complaints) > 0:
        report_complaints(complaints, slack_notifier)

    elapsed_time_in_seconds = round(time.time() - start_time, 2)
    LOGGER.info("Completed indicator run",
                elapsed_time_in_seconds=elapsed_time_in_seconds)
Exemplo n.º 7
0
def run_module():
    params = read_params()
    meta = covidcast.metadata()

    complaints = []
    for data_source in params["sources"].keys():
        complaints.extend(
            check_source(data_source, meta, params["sources"],
                         params.get("grace", 0), LOGGER))

    if len(complaints) > 0:
        for complaint in complaints:
            LOGGER.critical(
                event="signal out of SLA",
                message=complaint.message,
                data_source=complaint.data_source,
                signal=complaint.signal,
                geo_types=complaint.geo_types,
                last_updated=complaint.last_updated.strftime("%Y-%m-%d"))

        report_complaints(complaints, params)

        sys.exit(1)
Exemplo n.º 8
0
 def _fetch_meta(self):
     """fetch metadata"""
     metadata = covidcast.metadata()
     return metadata