def next_missing_day(source, signals): """Fetch the first day for which we want to generate new data.""" meta_df = covidcast.metadata() meta_df = meta_df[meta_df["data_source"] == source] meta_df = meta_df[meta_df["signal"].isin(signals)] # min: use the max_time of the most lagged signal, in case they differ # +timedelta: the subsequent day is the first day of new data to generate day = min(meta_df["max_time"]) + timedelta(days=1) return day
def main(args): """Generate data for the signal dashboard. `args`: parsed command-line arguments """ log_file = None if args: log_file = args.log_file logger = get_structured_logger("signal_dash_data_generator", filename=log_file, log_exceptions=False) start_time = time.time() database = Database() signals_to_generate = database.get_enabled_signals() logger.info( "Starting generating dashboard data.", enabled_signals=[signal.name for signal in signals_to_generate]) metadata = covidcast.metadata() signal_status_list: List[DashboardSignalStatus] = [] coverage_list: List[DashboardSignalCoverage] = [] for dashboard_signal in signals_to_generate: latest_issue = get_latest_issue_from_metadata(dashboard_signal, metadata) latest_time_value = get_latest_time_value_from_metadata( dashboard_signal, metadata) latest_coverage = get_coverage(dashboard_signal, metadata) signal_status_list.append( DashboardSignalStatus(signal_id=dashboard_signal.db_id, date=datetime.date.today(), latest_issue=latest_issue, latest_time_value=latest_time_value)) coverage_list.extend(latest_coverage) try: database.write_status(signal_status_list) logger.info("Wrote status.", rowcount=database.rowcount()) except mysql.connector.Error as exception: logger.exception(exception) try: database.write_coverage(coverage_list) logger.info("Wrote coverage.", rowcount=database.rowcount()) except mysql.connector.Error as exception: logger.exception(exception) logger.info("Generated signal dashboard data", total_runtime_in_seconds=round(time.time() - start_time, 2)) return True
def get_geo_signal_combos(data_source): """ Get list of geo type-signal type combinations that we expect to see, based on combinations reported available by COVIDcast metadata. """ meta = covidcast.metadata() source_meta = meta[meta['data_source'] == data_source] # Need to convert np.records to tuples so they are hashable and can be used in sets and dicts. geo_signal_combos = list( map(tuple, source_meta[["geo_type", "signal"]].to_records(index=False))) print("Number of expected geo region-signal combinations:", len(geo_signal_combos)) return geo_signal_combos
def get_geo_signal_combos(data_source): """ Get list of geo type-signal type combinations that we expect to see. Cross references based on combinations reported available by COVIDcast metadata. """ # Maps data_source name with what's in the API, lists used in case of multiple names source_signal_mappings = { 'chng': ['chng-cli', 'chng-covid'], 'indicator-combination': ['indicator-combination-cases-deaths'], 'quidel': ['quidel-covid-ag'], 'safegraph': ['safegraph-weekly'] } meta = covidcast.metadata() source_meta = meta[meta['data_source'] == data_source] # Need to convert np.records to tuples so they are hashable and can be used in sets and dicts. geo_signal_combos = list(map(tuple, source_meta[["geo_type", "signal"]].to_records(index=False))) # Only add new geo_sig combos if status is active new_geo_signal_combos = [] # Use a seen dict to save on multiple calls: # True/False indicate if status is active, "unknown" means we should check sig_combo_seen = dict() for combo in geo_signal_combos: if source_signal_mappings.get(data_source): src_list = source_signal_mappings.get(data_source) else: src_list = [data_source] for src in src_list: sig = combo[1] geo_status = sig_combo_seen.get((sig, src), "unknown") if geo_status is True: new_geo_signal_combos.append(combo) elif geo_status == "unknown": epidata_signal = requests.get( "https://api.covidcast.cmu.edu/epidata/covidcast/meta", params={'signal': f"{src}:{sig}"}) # Not an active signal active_status = [val['active'] for i in epidata_signal.json() for val in i['signals']] if active_status == []: sig_combo_seen[(sig, src)] = False continue sig_combo_seen[(sig, src)] = active_status[0] if active_status[0] is True: new_geo_signal_combos.append(combo) return new_geo_signal_combos
def public_signal(signal): """Check if the signal name is already public using COVIDcast. Parameters ---------- signal : str Name of the signal Returns ------- bool True if the signal is present False if the signal is not present """ epidata_df = covidcast.metadata() for index in range(len(epidata_df)): if epidata_df["signal"][index] == signal: return True return False
def run_module(): start_time = time.time() params = read_params() meta = covidcast.metadata() slack_notifier = None if "channel" in params and "slack_token" in params: slack_notifier = SlackNotifier(params["channel"], params["slack_token"]) complaints = [] for data_source in params["sources"].keys(): complaints.extend( check_source(data_source, meta, params["sources"], params.get("grace", 0), LOGGER)) if len(complaints) > 0: report_complaints(complaints, slack_notifier) elapsed_time_in_seconds = round(time.time() - start_time, 2) LOGGER.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds)
def run_module(): params = read_params() meta = covidcast.metadata() complaints = [] for data_source in params["sources"].keys(): complaints.extend( check_source(data_source, meta, params["sources"], params.get("grace", 0), LOGGER)) if len(complaints) > 0: for complaint in complaints: LOGGER.critical( event="signal out of SLA", message=complaint.message, data_source=complaint.data_source, signal=complaint.signal, geo_types=complaint.geo_types, last_updated=complaint.last_updated.strftime("%Y-%m-%d")) report_complaints(complaints, params) sys.exit(1)
def _fetch_meta(self): """fetch metadata""" metadata = covidcast.metadata() return metadata