class IfStateCases(InfluxBase): all_state_daily_cases = {} influx_api = None state_populations = None if_state_data = None def __init__(self): super().__init__("daily_cases") self.state_populations = StatePopulation() self.if_state_data = IfStateData() self.influx_api = InfluxApi() all_state_data = self.if_state_data.get_all_state_data() for state_name in all_state_data: self.all_state_daily_cases[state_name] = {} first_row = True state_data = all_state_data[state_name] for sortable_date in sorted(state_data.keys()): if first_row: cum_cases_yesterday = int( state_data[sortable_date]["active"]) first_row = False else: cum_cases = int(state_data[sortable_date]["active"]) daily_cases = cum_cases - cum_cases_yesterday self.all_state_daily_cases[state_name][sortable_date] = {} self.all_state_daily_cases[state_name][sortable_date][ "value"] = daily_cases self.all_state_daily_cases[state_name][sortable_date][ "population"] = str( self.state_populations.get_state_population( state_name)) self.all_state_daily_cases[state_name][sortable_date][ "epoch_date"] = state_data[sortable_date]["epoch_date"] cum_cases_yesterday = cum_cases def get_all_state_daily_cases(self): return self.all_state_daily_cases def add_all_state_cases_to_influxdb(self): for state_name in self.all_state_daily_cases: state_daily_cases = self.all_state_daily_cases[state_name] for sortable_date in sorted(state_daily_cases.keys()): time_series = "" time_series += "daily_cases," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "population=" + str( state_daily_cases[sortable_date]["population"]) + "," time_series += "value=" + str( state_daily_cases[sortable_date]["value"]) + " " time_series += state_daily_cases[sortable_date]["epoch_date"] self.influx_api.write(time_series)
class IfStateData(InfluxBase): data_dir = "/var/lib/covid/data/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/" all_state_data = {} influx_api = None input_files = None state_populations = None def __init__(self): super().__init__("state_data") self.state_populations = StatePopulation() self.input_files = [f for f in glob.glob(self.data_dir + "*.csv")] self.influx_api = InfluxApi() for input_file in self.input_files: first_line = True sortable_date = file_util.file_to_sortable_date(input_file) with open(input_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: if first_line: first_line = False else: if row[1] == "US" and row[2] != "" and row[ 0] != "Recovered": state_row = {} state_row["state"] = row[0] state_row["country"] = row[1] state_row["last_update"] = row[2] state_row["lat"] = row[3] state_row["long"] = row[4] state_row["confirmed"] = row[5] state_row["cum_deaths"] = row[6] state_row["recovered"] = row[7] if row[8] == "": state_row["active"] = 0 else: state_row["active"] = int(float(row[8])) state_row["fips"] = row[9] state_row["incident_rate"] = row[10] state_row["people_tested"] = row[11] state_row["people_hospitalized"] = row[12] state_row["mortality_rate"] = row[13] state_row["uid"] = row[14] state_row["iso3"] = row[15] state_row["testing_rate"] = row[16] state_row["hopitalization_rate"] = row[17] state_row[ "population"] = self.state_populations.get_state_population( row[0]) state_row["epoch_date"] = date_util.date_to_epoch( sortable_date) state_name = row[0] if state_name in self.all_state_data: self.all_state_data[state_name][ sortable_date] = state_row else: self.all_state_data[state_name] = { sortable_date: state_row } def get_all_state_data(self): return self.all_state_data def add_all_state_data_to_influxdb(self): for state_name in self.all_state_data: self.add_single_state_data_to_influxdb( self.all_state_data[state_name]) def add_single_state_data_to_influxdb(self, state_data): for sortable_date in sorted(state_data.keys()): time_series = "" time_series += "state_data," time_series += "name=" + StringUtil.canonical( state_data[sortable_date]["state"]) + "," time_series += "country=" + state_data[sortable_date][ "country"] + " " time_series += "state=\"" + state_data[sortable_date][ "state"] + "\"," time_series += "population=" + str( state_data[sortable_date]["population"]) + "," time_series += "last_update=\"" + state_data[sortable_date][ "last_update"] + "\"," time_series += "lat=" + StringUtil.default_zero( state_data[sortable_date]["lat"]) + "," time_series += "long=" + StringUtil.default_zero( state_data[sortable_date]["long"]) + "," time_series += "confirmed=" + StringUtil.default_zero( state_data[sortable_date]["confirmed"]) + "," time_series += "cum_deaths=" + StringUtil.default_zero( state_data[sortable_date]["cum_deaths"]) + "," time_series += "recovered=" + StringUtil.default_zero( state_data[sortable_date]["recovered"]) + "," time_series += "active=" + StringUtil.default_zero( state_data[sortable_date]["active"]) + "," time_series += "fips=" + state_data[sortable_date]["fips"] + "," time_series += "incident_rate=" + StringUtil.default_zero( state_data[sortable_date]["incident_rate"]) + "," time_series += "people_tested=" + StringUtil.default_zero( state_data[sortable_date]["people_tested"]) + "," time_series += "people_hospitalized=" + StringUtil.default_zero( state_data[sortable_date]["people_hospitalized"]) + "," time_series += "mortality_rate=" + StringUtil.default_zero( state_data[sortable_date]["mortality_rate"]) + "," time_series += "uid=" + state_data[sortable_date]["uid"] + "," time_series += "iso3=\"" + state_data[sortable_date]["iso3"] + "\"," time_series += "testing_rate=" + StringUtil.default_zero( state_data[sortable_date]["testing_rate"]) + "," time_series += "hopitalization_rate=" + StringUtil.default_zero( state_data[sortable_date]["hopitalization_rate"]) + " " time_series += state_data[sortable_date]["epoch_date"] self.influx_api.write(time_series)
class IfStateAvg7Days(InfluxBase): if_state_mortality = None if_state_trend = None influx_api = None all_state_avgs = {} def __init__(self): super().__init__("daily_deaths_seven_day_avg") self.if_state_mortality = IfStateMortality() self.if_state_trend = IfStateTrend() self.influx_api = InfluxApi() all_state_daily_deaths = self.if_state_mortality.get_all_state_daily_deaths( ) for state_name in all_state_daily_deaths: state_daily_death = all_state_daily_deaths[state_name] last_7_state_data = self.get_last_seven(state_daily_death) self.all_state_avgs[state_name] = {} mean_7_day_deaths = self.mean_from_state_list( last_7_state_data, "value") fourth_from_last_key = self.get_fourth_from_last_key( state_daily_death) fourth_from_last_epoch = state_daily_death[fourth_from_last_key][ "epoch_date"] trend_slope = self.if_state_trend.get_all_state_trends( )[state_name]["slope"] trend_y_intercept = self.if_state_trend.get_all_state_trends( )[state_name]["y_intercept"] all_state_trends = self.if_state_trend.get_all_state_trends() fourth_from_last_trend_value = self.if_state_trend.get_y_for_x( fourth_from_last_epoch, trend_slope, trend_y_intercept) fourth_from_last_delta = mean_7_day_deaths - fourth_from_last_trend_value # calculate delta percentage if fourth_from_last_trend_value <= 0: # cheat here if the trend line is at or below zero, just set the percent change from trend to whatever # the mean_7_day_deaths value is mean_vs_trend_percent_delta = mean_7_day_deaths else: mean_vs_trend_percent_delta = fourth_from_last_delta / fourth_from_last_trend_value self.all_state_avgs[state_name]["mean_deaths"] = mean_7_day_deaths self.all_state_avgs[state_name][ "fourth_from_last_trend_value"] = fourth_from_last_trend_value self.all_state_avgs[state_name][ "fourth_from_last_delta"] = fourth_from_last_delta self.all_state_avgs[state_name][ "fourth_from_last_delta_percent"] = mean_vs_trend_percent_delta self.all_state_avgs[state_name][ "epoch_date"] = fourth_from_last_epoch def get_all_state_avgs(self): return self.all_state_avgs def add_state_avg_7_day_to_influxdb(self): for state_name in self.all_state_avgs: state_avgs = self.all_state_avgs[state_name] time_series = "" time_series += "daily_deaths_seven_day_avg," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "mean_deaths=" + str( state_avgs["mean_deaths"]) + "," time_series += "fourth_from_last_trend_value=" + str( state_avgs["fourth_from_last_trend_value"]) + "," time_series += "fourth_from_last_delta=" + str( state_avgs["fourth_from_last_delta"]) + " " time_series += str(state_avgs["epoch_date"]) self.influx_api.write(time_series) def mean_from_state_list(self, state_list, key): list_len = len(state_list) item_sum = 0 for date in state_list: item_sum += int(state_list[date][key]) return item_sum / list_len def slope_from_state_list(self, state_list, x_key, y_key, x_mean, y_mean): numerator = 0 denominator = 0 for date in state_list: x_value = state_list[date][x_key] y_value = state_list[date][y_key] numerator += (float(x_value) - float(x_mean)) * (float(y_value) - float(y_mean)) denominator += (float(x_value) - float(x_mean)) * (float(x_value) - float(x_mean)) if denominator == 0: slope = 0 else: slope = numerator / denominator return slope def get_y_intercept(self, x_mean, y_mean, slope): return y_mean - (slope * x_mean) def get_y_for_x(self, x, slope, y_intercept): return (float(slope) * float(x)) + float(y_intercept) def get_last_seven(self, state_daily_deaths): sorted_keys = sorted(state_daily_deaths.keys()) seven_states = {} for key in sorted_keys[-7:]: seven_states[key] = state_daily_deaths[key] return seven_states def get_fourth_from_last_key(self, state_daily_deaths): sorted_keys = sorted(state_daily_deaths.keys()) return sorted_keys[-4]
class IfStateTrend7Days(InfluxBase): if_state_mortality = None influx_api = None all_state_trends = {} def __init__(self): super().__init__("trend_daily_deaths_seven_day") self.if_state_mortality = IfStateMortality() self.influx_api = InfluxApi() all_state_daily_deaths = self.if_state_mortality.get_all_state_daily_deaths( ) for state_name in all_state_daily_deaths: state_data = all_state_daily_deaths[state_name] last_7_state_data = self.get_last_seven(state_data) self.all_state_trends[state_name] = {} mean_deaths = self.mean_from_state_list(last_7_state_data, "value") mean_epoch = self.mean_from_state_list(last_7_state_data, "epoch_date") slope = self.slope_from_state_list(last_7_state_data, "epoch_date", "value", mean_epoch, mean_deaths) y_intercept = self.get_y_intercept(mean_epoch, mean_deaths, slope) min_sortable_date = min(last_7_state_data.keys()) max_sortable_date = max(last_7_state_data.keys()) min_epoch = last_7_state_data[min_sortable_date]["epoch_date"] y_min = self.get_y_for_x(min_epoch, slope, y_intercept) max_epoch = last_7_state_data[max_sortable_date]["epoch_date"] y_max = self.get_y_for_x(max_epoch, slope, y_intercept) self.all_state_trends[state_name]["mean_deaths"] = mean_deaths self.all_state_trends[state_name]["mean_epoch"] = mean_epoch self.all_state_trends[state_name]["slope"] = slope self.all_state_trends[state_name]["y_intercept"] = y_intercept self.all_state_trends[state_name][ "min_sortable_date"] = min_sortable_date self.all_state_trends[state_name]["min_epoch"] = min_epoch self.all_state_trends[state_name]["y_min"] = y_min self.all_state_trends[state_name][ "max_sortable_date"] = max_sortable_date self.all_state_trends[state_name]["max_epoch"] = max_epoch self.all_state_trends[state_name]["y_max"] = y_max def get_all_state_trends(self): return self.all_state_trends def add_state_trends_to_influxdb(self): for state_name in self.all_state_trends: state_trends = self.all_state_trends[state_name] time_series = "" time_series += "trend_daily_deaths_seven_day," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "value=" + str(state_trends["y_min"]) + " " time_series += state_trends["min_epoch"] self.influx_api.write(time_series) time_series = "" time_series += "trend_daily_deaths_seven_day," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "value=" + str(state_trends["y_max"]) + " " time_series += state_trends["max_epoch"] self.influx_api.write(time_series) def mean_from_state_list(self, state_list, key): list_len = len(state_list) item_sum = 0 for date in state_list: item_sum += int(state_list[date][key]) return item_sum / list_len def slope_from_state_list(self, state_list, x_key, y_key, x_mean, y_mean): numerator = 0 denominator = 0 for date in state_list: x_value = state_list[date][x_key] y_value = state_list[date][y_key] numerator += (float(x_value) - float(x_mean)) * (float(y_value) - float(y_mean)) denominator += (float(x_value) - float(x_mean)) * (float(x_value) - float(x_mean)) if denominator == 0: slope = 0 else: slope = numerator / denominator return slope def get_y_intercept(self, x_mean, y_mean, slope): return y_mean - (slope * x_mean) def get_y_for_x(self, x, slope, y_intercept): return (float(slope) * float(x)) + float(y_intercept) def get_last_seven(self, state_daily_deaths): sorted_keys = sorted(state_daily_deaths.keys()) seven_states = {} for key in sorted_keys[-7:]: seven_states[key] = state_daily_deaths[key] return seven_states
class IfStateTrendDelta(InfluxBase): all_state_deltas = {} def __init__(self): super().__init__("delta_daily_deaths") if_state_mortality = IfStateMortality() if_state_trend = IfStateTrend() if_state_trend_7_days = IfStateTrend7Days() self.influx_api = InfluxApi() all_state_daily_deaths = if_state_mortality.get_all_state_daily_deaths() all_state_trends = if_state_trend.get_all_state_trends() all_state_trends_7_days = if_state_trend_7_days.get_all_state_trends() for state_name in all_state_trends: print("state: " + state_name) state_trends = all_state_trends[state_name] state_trends_7_days = all_state_trends_7_days[state_name] state_daily_deaths = all_state_daily_deaths[state_name] # get the fourth from last daily death record. We use this to compare the height of the # full trend line with the seven day trend. daily_deaths_minus_four_key = self.get_fourth_from_last_key(state_daily_deaths) daily_death_minus_four = state_daily_deaths[daily_deaths_minus_four_key] # Get the fourth from last y value for both the full and 7 day trend. We do this because we want to # compare the middle of the 7 day trend with the same day on the full trend trend_full_minus_four_y = if_state_trend.get_y_for_x( daily_death_minus_four["epoch_date"], state_trends["slope"], state_trends["y_intercept"]) trend_7_days_minus_four_y = if_state_trend_7_days.get_y_for_x( daily_death_minus_four["epoch_date"], state_trends_7_days["slope"], state_trends_7_days["y_intercept"]) normalized_delta = self.calculate_normalized_delta( state_trends["slope"], state_trends_7_days["slope"], trend_full_minus_four_y, trend_7_days_minus_four_y, daily_death_minus_four[population]) # populate the all_state_deltas structure self.all_state_deltas[state_name] = {} self.all_state_deltas[state_name]["slope_total"] = state_trends["slope"] self.all_state_deltas[state_name]["slope_7_day"] = state_trends_7_days["slope"] self.all_state_deltas[state_name]["minus_four_y"] = trend_full_minus_four_y self.all_state_deltas[state_name]["minus_four_y_7_day"] = trend_7_days_minus_four_y self.all_state_deltas[state_name]["normalized_delta"] = normalized_delta def get_all_state_deltas(self): return self.all_state_deltas def add_state_deltas_to_influxdb(self): for state_name in self.all_state_deltas: state_trends = self.all_state_trends[state_name] time_series = "" time_series += "trend_daily_deaths," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "value=" + str(state_trends["y_min"]) + " " time_series += state_trends["min_epoch"] self.influx_api.write(time_series) time_series = "" time_series += "trend_daily_deaths," time_series += "name=" + StringUtil.canonical(state_name) + " " time_series += "value=" + str(state_trends["y_max"]) + " " time_series += state_trends["max_epoch"] self.influx_api.write(time_series) # get the key for the fourth from the last state daily death record. We get the fourth from last record # because we want to compare the height of the middle of the seven day trend with the height of the full # trend on the same day def get_fourth_from_last_key(self, state_daily_deaths): sorted_keys = sorted(state_daily_deaths.keys()) return sorted_keys[-4] # calculate a constant delta value that represents the amount of change in the last seven day trend from the # full dataset trend. This is a combination of the average height of the seven day trend relative to # the full trend at the same time, compared to the slope differences def calculate_normalized_delta(self, slope_total, slope_7_day, minus_four_y, minus_four_y_7_day, population): if minus_four_y == 0: percent_height_diff = (minus_four_y_7_day - minus_four_y) / .0001 else: percent_height_diff = (minus_four_y_7_day - minus_four_y) / minus_four_y slope_diff = slope_7_day - slope_total print("total slope: " + str(slope_total) + ", slope 7 day; " + str(slope_7_day)) print("slope diff: " + str(slope_diff)) print("y total: " + str(minus_four_y) + ", 7 day y total: " + str(minus_four_y_7_day)) print("percent height diff: " + str(percent_height_diff))