def get_country_stats(country_alpha: str, metric_type: str) -> pd.DataFrame: """ Find the metric type data from the Johns Hopkins github csv file for a specific country and returns to the dataframe. :param: :country_alpha: :str: country alpha2 code. :param: :metric_type: :str: currently only confirmed or death supported :return: :pd.DataFrame: dataframe of the queried data. """ country_alpha = country_alpha.upper() metric_type = metric_type.lower() try: df = parse_df(metric_type=metric_type) except: raise DataReadingError("error accessing country data") if country_alpha not in country_dict: raise ValueError(f"{country_alpha} not found in our dictionary.") country = country_dict[country_alpha] df = df[df["Country/Region"] == country] df = df.drop(columns=["Lat", "Long", "Country/Region", "Province/State"]) df = df.sum(axis=0).to_frame().reset_index() df = df.rename(columns={0: metric_type.title()}) df = df.reset_index(drop=True) df = df.rename(columns={"index": "Date"}) return df
def read_county_stats(state: str, county: str) -> Dict: try: df = pd.read_csv(app_config.COUNTY_URL) deaths = pd.read_csv(app_config.STATE_DEATH) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") try: df.columns = map(str.lower, df.columns) df.columns = df.columns.str.replace(" ", "_") # used data source 2 for new death number deaths = deaths[deaths['Province_State'] == reverse_states_map[state]] deaths = deaths[deaths['Admin2'] == county] deaths = deaths.iloc[:, 12:].diff(axis=1).iloc[:, -1].values[0] df = df[df["state_name"] == reverse_states_map[state]] # df = df.query(f"county_name == '{county}'") df = df[df["county_name"] == county] df.new_death.iloc[0] = deaths df = pd.DataFrame.to_dict(df, orient="records") if len(df) == 0: raise DataValidationError("county.py len(df) == 0") except: raise DataValidationError( f"Can't find State: {state}, and County: {county} combination.") return df
def read_county_stats(state: str, county: str) -> Dict: try: df = ingest_county_data(url=app_config.COUNTY_URL) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") # 2020-04-22 patch counties if (state == "WA") and (county in ["Benton", "Franklin"]): county = "Benton and Franklin" if (state == "MA") and (county in ["Dukes", "Nantucket"]): county = "Dukes and Nantucket" # 2020-04-26 patch territories and districts territories = ["DC", "GU", "AS", "PR", "MP"] # Fetch state data full_state_name = state try: full_state_name = reverse_states_map[state] df = df[df["state_name"] == full_state_name] if len(df) == 0: raise DataValidationError( f"No records found for {full_state_name} in our database.") except: raise DataReadingError( f"Can't find {full_state_name} in our database.") # Now fetch county data try: if state in territories: df = df.reset_index(drop=True) df.loc[0, "county_name"] = full_state_name # 2020-04-26 pandanmic # df["county_name"] == full_state_name else: df = df[df["county_name"] == county] if len(df) == 0: raise DataValidationError( f"No records found for {full_state_name} in our database.") except: raise DataValidationError(f"Can't find State: {full_state_name}," f" and County: {county} combination.") df = pd.DataFrame.to_dict(df, orient="records") return df
def read_county_stats_zip_ny(zipcode: str) -> Dict: """Return stats for New York State zip_codes """ zip_info = zipcodes.matching(str(zipcode))[0] county = zip_info["county"].rsplit(" ", 1)[0] state = zip_info["state"] try: deaths = pd.read_csv(app_config.STATE_DEATH) confirmed_df = pd.read_csv(app_config.STATE_CONFIRMED) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") try: confirmed_df = confirmed_df[confirmed_df["Province_State"] == reverse_states_map[state]] confirmed_df = confirmed_df[confirmed_df["Admin2"] == county] confirmed = confirmed_df.iloc[:, -1] new_confirmed = (confirmed_df.iloc[:, 12:].astype("int32").diff( axis=1).iloc[:, -1].values[0]) # used data source 2 for new death number deaths = deaths[deaths["Province_State"] == reverse_states_map[state]] deaths = deaths[deaths["Admin2"] == county] # 4/15/20: force cast into int before diff as pd sometimes read as # float and throws nan. death = deaths.iloc[:, -1] new_death = (deaths.iloc[:, 12:].astype("int32").diff( axis=1).iloc[:, -1].values[0]) try: fatality_rate = int(death) / int(confirmed) except: # pylint: disable=W0702 fatality_rate = 0 data = { "county_name": county, "state_name": reverse_states_map[state], "confirmed": int(confirmed), "new": int(new_confirmed), "death": int(death), "new_death": int(new_death), "fatality_rate": f"{fatality_rate}%", "latitude": float(zip_info["lat"]), "longitude": float(zip_info["long"]), "last_update": str("2020-04-17 19:50 EDT"), } print(data) # data = json.dumps(data) # print(data) except: raise DataValidationError( f"Can't find State: {state}, and County: {county} combination.") return data
def read_states(state: str) -> Dict: """read date, confirmed, and death info of a state and return it as a dictionary """ state = reverse_states_map[state] try: data = pd.read_csv(app_config.NYT_STATE) data = data[data['state'] == state] data = data[['date', 'cases', 'deaths']] data.columns = ['Date', 'Confirmed', 'Deaths'] data = data.fillna(0) dict_data = pd.DataFrame.to_dict(data, orient="records") del data gc.collect() except: raise DataReadingError("error reading data") return dict_data
def parse_df(metric_type: str) -> pd.DataFrame: """ Parse data in Johns Hopkins github csv file for the supported metric_type and return the dataframe to people. :param: :str: :metric_type: Currently only confirmed or death supported :return: :pd.DataFrame: dataframe of the queried data. """ if metric_type.startswith("confirmed"): metric_type = "confirmed" elif metric_type.startswith("death"): metric_type = "deaths" else: raise ValueError(f"{metric_type} metric type not supported") url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric_type}_global.csv" try: df = pd.read_csv(url) except: raise DataReadingError("error accessing country data") return df
def test_DataReadingError_with_message(): with pytest.raises(DataReadingError) as excinfo: raise DataReadingError("with message") assert str(excinfo.value) == "DataReadingError with message"
def get_daily_state_stats(state: str) -> Dict: """Get daily stats for a specific state, including tested, confirmed, todays_confirmed, deaths, and todays_deaths. Everything is initialized at zero. :params: :str: state. the state to look up. :return: :Dict: {"tested": str, "todays_tested": str, "confirmed": str, "todays_confirmed": str, "deaths": str, "todays_deaths: str} """ # initialize the variables so it doesnt crash if both api call failed tested, todays_tested, confirmed = 0, 0, 0 todays_confirmed, deaths, todays_deaths = 0, 0, 0 # Get tested data URL = app_config.CVTRACK_STATES_URL response = requests.get(url=URL) if response.status_code == 200: data = response.json() if isinstance(data, list): try: data = [d for d in data if d["state"] == state] curr = data[0] prev = data[1] todays_tested = (curr["totalTestResults"] - prev["totalTestResults"]) tested = curr["totalTestResults"] except DataReadingError as ex: raise DataReadingError(f"error getting tested data {ex}") else: tested, todays_tested = 0, 0 else: raise DataReadingError("get_daily_state_stats data reading error") # Get confirmed and deaths data try: base_url = app_config.COUNTY_URL df = pd.read_csv(base_url) df = df[df["State Name"] == reverse_states_map[state]] grouped = df.groupby(["State Name"]) confirmed = grouped["Confirmed"].sum().values[0].astype(str) todays_confirmed = grouped["New"].sum().values[0].astype(str) deaths = grouped["Death"].sum().values[0].astype(str) todays_deaths = grouped["New Death"].sum().values[0].astype(str) except DataReadingError as ex: raise DataReadingError(f"get_daily_state_stats parsing error {ex}") stats = { "tested": tested, "todays_tested": todays_tested, "confirmed": confirmed, "todays_confirmed": todays_confirmed, "deaths": deaths, "todays_deaths": todays_deaths, } ################################################################### # Sanity Check ################################################################### if int(todays_tested) >= int(tested): raise DataValidationError("/stats tested number validation error") if int(todays_confirmed) >= int(confirmed): raise DataValidationError("/stats confirmed number validation error") if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)): raise DataValidationError("/stats numbers comparison validation error") del df, data gc.collect() return stats
def get_daily_state_stats(state: str) -> Dict: """Get daily stats for a specific state, including tested, confirmed, todays_confirmed, deaths, and todays_deaths. Everything is initialized at zero. :params: :str: state. the state to look up. :return: :Dict: {"tested": str, "todays_tested": str, "confirmed": str, "todays_confirmed": str, "deaths": str, "todays_deaths: str} """ # initialize the variables so it doesnt crash if both api call failed tested, todays_tested, confirmed = 0, 0, 0 todays_confirmed, deaths, todays_deaths = 0, 0, 0 URL = app_config.CVTRACK_STATES_URL + f"/daily?state={state}" response = requests.get(url=URL) if response.status_code == 200: # covidtracking api throws error json if request error {'error': } if isinstance(response.json(), list): try: data = response.json() curr = data[0] prev = data[1] todays_tested = curr["totalTestResults"] - \ prev["totalTestResults"] tested = curr["totalTestResults"] todays_deaths = curr["deathIncrease"] except: raise DataReadingError("get_daily_state_stats parsing error") base_url = app_config.COUNTY_URL df = pd.read_csv(base_url) df = df[df["State Name"] == reverse_states_map[state]] grouped = df.groupby(["State Name"]) confirmed = grouped["Confirmed"].sum().values[0].astype(str) todays_confirmed = grouped["New"].sum().values[0].astype(str) deaths = grouped["Death"].sum().values[0].astype(str) else: raise DataReadingError("get_daily_state_stats data reading error") stats = { "tested": tested, "todays_tested": todays_tested, "confirmed": confirmed, "todays_confirmed": todays_confirmed, "deaths": deaths, "todays_deaths": todays_deaths, } ################################################################### # Sanity Check ################################################################### if ((int(todays_tested) >= int(tested)) or (int(todays_confirmed) >= int(confirmed))): # not checking for todays_deaths >= deaths raise DataValidationError("stats.py numbers doesn't make sense") if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)): raise DataValidationError("stats.py numbers doesnt make sense") del df, data gc.collect() return stats