def computeMunicipalityCases(update_time): filename = 'csv/municipality-cases.csv' print("Processing", filename) old_hash = sha1sum(filename) dfConfirmed = pd.read_csv('csv/municipality-confirmed.csv', index_col='date') dfActive = pd.read_csv('csv/municipality-active.csv', index_col='date') dfDeceased = pd.read_csv('csv/municipality-deceased.csv', index_col='date') dfVaccinated = pd.read_csv('csv/vaccination-by_municipality.csv', index_col='date') dfConfirmed.columns = [ str(col) + '.cases.confirmed.todate' for col in dfConfirmed.columns ] dfActive.columns = [str(col) + '.cases.active' for col in dfActive.columns] dfDeceased.columns = [ str(col) + '.deceased.todate' for col in dfDeceased.columns ] dfVaccinated = dfVaccinated.filter(like='date', axis='columns') \ .rename(mapper=lambda x: x.replace('vaccination.region', 'region'), axis='columns') \ .rename(mapper=lambda x: x.replace('1st.todate', 'vaccinated.1st.todate'), axis='columns') \ .rename(mapper=lambda x: x.replace('2nd.todate', 'vaccinated.2nd.todate'), axis='columns') merged = dfConfirmed.join(dfActive).join(dfDeceased).join( dfVaccinated).sort_index(axis=1) merged.to_csv(filename, float_format='%.0f', index_label='date') write_timestamp_file(filename=filename, old_hash=old_hash)
def computeVaccination(update_time): filename = 'csv/vaccination.csv' print("Processing", filename) old_hash = sha1sum(filename) df_a = pd.read_csv('csv/vaccination-administered.csv', index_col='date') df_d = pd.read_csv('csv/vaccination-delivered.csv', index_col='date') merged = df_a.join(df_d, how='outer') merged['vaccination.pfizer.delivered.todate'] = \ merged['vaccination.pfizer.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.moderna.delivered.todate'] = \ merged['vaccination.moderna.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.az.delivered.todate'] = \ merged['vaccination.az.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.delivered.todate'] = merged['vaccination.pfizer.delivered.todate'] \ .add(merged['vaccination.moderna.delivered.todate'], fill_value=0) \ .add(merged['vaccination.az.delivered.todate'], fill_value=0).astype('Int64') merged = merged.reindex( [ # sort 'vaccination.administered', 'vaccination.administered.todate', 'vaccination.administered2nd', 'vaccination.administered2nd.todate', 'vaccination.used.todate', 'vaccination.delivered.todate', 'vaccination.pfizer.delivered', 'vaccination.pfizer.delivered.todate', 'vaccination.moderna.delivered', 'vaccination.moderna.delivered.todate', 'vaccination.az.delivered', 'vaccination.az.delivered.todate' ], axis='columns') merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n') write_timestamp_file(filename=filename, old_hash=old_hash)
def import_nijz_dash_vacc_delivered(): filename = "csv/vaccination-delivered.csv" df = pd.DataFrame.from_dict( cepimose.vaccines_supplied_by_manufacturer()).set_index('date').rename( columns=lambda m: f'vaccination.{m}.delivered') manufacturersMap = { "pfizer": cepimose.data.Manufacturer.PFIZER, "moderna": cepimose.data.Manufacturer.MODERNA, "az": cepimose.data.Manufacturer.AZ, "janssen": cepimose.data.Manufacturer.JANSSEN, "novavax": cepimose.data.Manufacturer.NOVAVAX, } columns = [] for m in manufacturersMap: columns.append(f'vaccination.{m}.delivered') # sort columns df = df[columns] df = adjust_vacc_delivered(df) # write csv old_hash = sha1sum(filename) # force integer type df.fillna(0).round().astype('Int64').replace({ 0: None }).dropna(thresh=1).to_csv(filename, date_format="%Y-%m-%d", line_terminator='\r\n') write_timestamp_file(filename, old_hash)
def import_sheet(update_time, sheet, range, filename, **kwargs): print("Processing", filename) pathlib.Path(os.path.dirname(filename)).mkdir(parents=True, exist_ok=True) old_hash = sha1sum(filename) try: sheet2csv.sheet2csv(id=sheet, range=range, api_key=GOOGLE_API_KEY, filename=filename, **kwargs) except Exception as e: print("Failed to import {}".format(filename)) raise e write_timestamp_file(filename=filename, old_hash=old_hash)
def import_nijz_dash_vacc_by_age(): filename = "csv/vaccination-by_age.csv" # map cepimose regions to sledilnik regions, preserving previous order ageGroups = { cepimose.data.AgeGroup.GROUP_0_11: "0-11", cepimose.data.AgeGroup.GROUP_12_17: "12-17", cepimose.data.AgeGroup.GROUP_18_24: "18-24", cepimose.data.AgeGroup.GROUP_25_29: "25-29", cepimose.data.AgeGroup.GROUP_30_34: "30-34", cepimose.data.AgeGroup.GROUP_35_39: "35-39", cepimose.data.AgeGroup.GROUP_40_44: "40-44", cepimose.data.AgeGroup.GROUP_45_49: "45-49", cepimose.data.AgeGroup.GROUP_50_54: "50-54", cepimose.data.AgeGroup.GROUP_55_59: "55-59", cepimose.data.AgeGroup.GROUP_60_64: "60-64", cepimose.data.AgeGroup.GROUP_65_69: "65-69", cepimose.data.AgeGroup.GROUP_70_74: "70-74", cepimose.data.AgeGroup.GROUP_75_79: "75-79", cepimose.data.AgeGroup.GROUP_80_84: "80-84", cepimose.data.AgeGroup.GROUP_85_89: "85-89", cepimose.data.AgeGroup.GROUP_90: "90+" } df = pd.DataFrame() vByAgeGroups = cepimose.vaccinations_by_age_group() for ag in vByAgeGroups: print(f"Joining {ageGroups[ag]} ({ag}): {len(vByAgeGroups[ag])} rows:") agData = pd.DataFrame.from_dict(vByAgeGroups[ag]).set_index('date') # agData["first_diff"] = agData["first_dose"].diff() # agData["second_diff"] = agData["second_dose"].diff() # agData = agData[['first_diff', 'first_dose', 'second_diff', 'second_dose']] agData.rename( inplace=True, columns={ # 'first_diff': f'vaccination.age.{ageGroups[ag]}.1st', 'first_dose': f'vaccination.age.{ageGroups[ag]}.1st.todate', # 'second_diff': f'vaccination.region.{ageGroups[ag]}.2nd', 'second_dose': f'vaccination.age.{ageGroups[ag]}.2nd.todate', # 'third_diff': f'vaccination.region.{ageGroups[ag]}.3rd', 'third_dose': f'vaccination.age.{ageGroups[ag]}.3rd.todate', }) print(agData) print(agData.describe()) df = df.join(agData, how='outer') print(df) print(df.describe()) old_hash = sha1sum(filename) df.astype('Int64').replace({ 0: None }).to_csv(filename, date_format='%Y-%m-%d') write_timestamp_file(filename, old_hash)
def computeMunicipalityCases(update_time): filename = 'csv/municipality-cases.csv' print("Processing", filename) old_hash = sha1sum(filename) dfConfirmed = pd.read_csv('csv/municipality-confirmed.csv', index_col='date') dfActive = pd.read_csv('csv/municipality-active.csv', index_col='date') dfDeceased = pd.read_csv('csv/municipality-deceased.csv', index_col='date') dfConfirmed.columns = [str(col) + '.cases.confirmed.todate' for col in dfConfirmed.columns] dfActive.columns = [str(col) + '.cases.active' for col in dfActive.columns] dfDeceased.columns = [str(col) + '.deceased.todate' for col in dfDeceased.columns] merged = dfConfirmed.join(dfActive).join(dfDeceased).sort_index(axis=1) merged.to_csv(filename, float_format='%.0f', index_label='date') write_timestamp_file(filename=filename, old_hash=old_hash)
def import_nijz_dash_vacc_administred(): filename = "csv/vaccination-administered.csv" df = pd.DataFrame.from_dict( cepimose.vaccinations_by_day()).set_index('date').rename( columns={ 'first_dose': 'vaccination.administered.todate', 'second_dose': 'vaccination.administered2nd.todate', 'third_dose': 'vaccination.administered3rd.todate' }) # dummy row for diff calculation remowed afterwards dummy_date = datetime.datetime(2020, 12, 26) dummy_row = pd.DataFrame( { 'vaccination.administered.todate': 0, 'vaccination.administered2nd.todate': 0, 'vaccination.administered3rd.todate': 0 }, index=[dummy_date]) # calculate diffs from cumulative values (vaccinations per day) df_diff = pd.concat([ dummy_row, df ]).diff().drop(labels=[dummy_date]).rename( columns={ 'vaccination.administered.todate': 'vaccination.administered', 'vaccination.administered2nd.todate': 'vaccination.administered2nd', 'vaccination.administered3rd.todate': 'vaccination.administered3rd' }).astype('Int64') # merge dataframes (cumulative and per day) df = pd.merge(df, df_diff, right_index=True, left_index=True) # sort cols df = df[[ 'vaccination.administered', 'vaccination.administered.todate', 'vaccination.administered2nd', 'vaccination.administered2nd.todate', 'vaccination.administered3rd', 'vaccination.administered3rd.todate' ]] df = df.astype('Int64') # write csv old_hash = sha1sum(filename) # replace 0 with pd.NA so it does not get written to CSV df.replace(0, pd.NA).to_csv(filename, date_format='%Y-%m-%d') write_timestamp_file(filename, old_hash)
def computeRegionCases(update_time): filename = 'csv/region-cases.csv' print("Processing", filename) old_hash = sha1sum(filename) dfConfirmed = pd.read_csv('csv/region-confirmed.csv', index_col='date') dfActive = pd.read_csv('csv/region-active.csv', index_col='date') dfDeceased = pd.read_csv('csv/region-deceased.csv', index_col='date') dfConfirmed = dfConfirmed.rename(mapper=lambda x: x.replace('todate', 'cases.confirmed.todate'), axis='columns') \ .drop('region.cases.confirmed.todate', axis='columns') dfActive = dfActive.rename(mapper=lambda x: x.replace('active', 'cases.active'), axis='columns') \ .drop('region.cases.active', axis='columns') dfDeceased = dfDeceased.rename(mapper=lambda x: x.replace('todate', 'deceased.todate'), axis='columns') \ .drop('region.deceased.todate', axis='columns') merged = dfConfirmed.join(dfActive).join(dfDeceased).sort_index(axis=1) merged.to_csv(filename, float_format='%.0f', index_label='date') write_timestamp_file(filename=filename, old_hash=old_hash)
def computeCases(update_time): filename = 'csv/cases.csv' print("Processing", filename) # LAB (9:00): cases.confirmed, cases.confirmed.todate, cases.active, cases.closed df_cases = pd.read_csv(filename, index_col='date') df_cases_old_hash = sha1sum(filename) df_lab_tests = pd.read_csv('csv/lab-tests.csv', index_col='date').replace({None: 0}) date_diff = df_lab_tests.index.difference(df_cases.index) date_diff = [date for date in date_diff if date not in { # discard irrelevant early days '2020-02-02', '2020-02-09', '2020-02-16', '2020-02-23', '2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27', '2020-02-28', '2020-02-29', '2020-03-01', '2020-03-02', '2020-03-03' }] assert len(date_diff) <= 1, 'The date difference between lab-tests.csv and cases.csv is more than one day.' if len(date_diff) > 0: df_cases = df_cases.append(pd.DataFrame(index=date_diff, columns=df_cases.columns)) date = date_diff[0] # equals index of -1 # only manipulate last row df_cases.at[date, 'cases.confirmed'] = df_lab_tests.at[date, 'tests.positive'] + df_lab_tests.at[date, 'tests.hagt.positive'] df_cases.at[date, 'cases.confirmed.todate'] = df_cases.iloc[-2, df_cases.columns.get_loc('cases.confirmed.todate')] + df_cases.at[date, 'cases.confirmed'] df_cases['cases.active.temp'] = df_cases['cases.confirmed'].rolling(window=14).sum() df_cases.at[date, 'cases.active'] = df_cases.at[date, 'cases.active.temp'] df_cases.drop('cases.active.temp', axis='columns', inplace=True) df_cases.at[date, 'cases.closed.todate'] = df_cases.at[date, 'cases.confirmed.todate'] - df_cases.at[date, 'cases.active'] # TODO use common function for writing CSV df_cases.index.rename('date', inplace=True) # name it explicitly otherwise it doesn't show up in csv df_cases.replace({0: None}).astype('Int64').to_csv(filename, line_terminator='\r\n') write_timestamp_file(filename=filename, old_hash=df_cases_old_hash) # HOS (10:30): cases.recovered.todate df_patients = pd.read_csv('csv/patients.csv', index_col='date') df_cases['cases.recovered.todate'] = df_cases['cases.closed.todate'] - df_patients['state.deceased.todate'].shift(-1) df_cases = df_cases.reindex([ 'cases.confirmed', 'cases.confirmed.todate', 'cases.active', 'cases.closed.todate', 'cases.recovered.todate', 'cases.rh.occupant.confirmed.todate', 'cases.hs.employee.confirmed.todate', 'cases.rh.employee.confirmed.todate' ], axis='columns') df_cases.replace({0: None}).astype('Int64').to_csv(filename, line_terminator='\r\n') write_timestamp_file(filename=filename, old_hash=df_cases_old_hash)
def import_nijz_dash_vacc_delivered(): filename = "csv/vaccination-delivered.csv" df = pd.DataFrame.from_dict( cepimose.vaccines_supplied_by_manufacturer()).set_index('date').rename( columns=lambda m: f'vaccination.{m}.delivered') manufacturersMap = { "pfizer": cepimose.data.Manufacturer.PFIZER, "moderna": cepimose.data.Manufacturer.MODERNA, "az": cepimose.data.Manufacturer.AZ, "janssen": cepimose.data.Manufacturer.JANSSEN, } # add more columns manufacturers_supplied_used = cepimose.vaccinations_by_manufacturer_supplied_used( ) columns = [] for m in manufacturersMap: supplied_used = manufacturers_supplied_used[manufacturersMap[m]] df_supplied_used = pd.DataFrame.from_dict(supplied_used).rename( columns={ 'supplied': f'vaccination.{m}.delivered.todate', 'used': f'vaccination.{m}.used.todate', }).set_index('date') df = df.join(df_supplied_used) columns.append(f'vaccination.{m}.delivered') # columns.append(f'vaccination.{m}.delivered.todate') columns.append(f'vaccination.{m}.used.todate') # # sort columns df = df[columns] # write csv old_hash = sha1sum(filename) # force integer type df.fillna(0).round().astype('Int64').replace({ 0: None }).to_csv(filename, date_format="%Y-%m-%d", line_terminator='\r\n') write_timestamp_file(filename, old_hash)
def computeVaccination(update_time): filename = 'csv/vaccination.csv' print("Processing", filename) old_hash = sha1sum(filename) df_a = pd.read_csv('csv/vaccination-administered.csv', index_col='date') df_d = pd.read_csv('csv/vaccination-delivered.csv', index_col='date') df_m = pd.read_csv('csv/vaccination-used_by_manufacturer.csv', index_col='date') df_g = pd.read_csv('csv/vaccination-by_age.csv', index_col='date') merged = df_a.join(df_m, how='outer').join(df_d, how='outer').join(df_g, how='outer') merged['vaccination.pfizer.delivered.todate'] = \ merged['vaccination.pfizer.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.moderna.delivered.todate'] = \ merged['vaccination.moderna.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.az.delivered.todate'] = \ merged['vaccination.az.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.janssen.delivered.todate'] = \ merged['vaccination.janssen.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.novavax.delivered.todate'] = \ merged['vaccination.novavax.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64') merged['vaccination.delivered.todate'] = merged['vaccination.pfizer.delivered.todate'] \ .add(merged['vaccination.moderna.delivered.todate'], fill_value=0) \ .add(merged['vaccination.az.delivered.todate'], fill_value=0) \ .add(merged['vaccination.janssen.delivered.todate'], fill_value=0).astype('Int64') \ .add(merged['vaccination.novavax.delivered.todate'], fill_value=0).astype('Int64') merged = merged.reindex( [ # sort 'vaccination.administered', 'vaccination.administered.todate', 'vaccination.administered2nd', 'vaccination.administered2nd.todate', 'vaccination.administered3rd', 'vaccination.administered3rd.todate', 'vaccination.used.todate', 'vaccination.pfizer.used.todate', 'vaccination.moderna.used.todate', 'vaccination.az.used.todate', 'vaccination.janssen.used.todate', 'vaccination.novavax.used.todate', 'vaccination.delivered.todate', 'vaccination.pfizer.delivered', 'vaccination.pfizer.delivered.todate', 'vaccination.moderna.delivered', 'vaccination.moderna.delivered.todate', 'vaccination.az.delivered', 'vaccination.az.delivered.todate', 'vaccination.janssen.delivered', 'vaccination.janssen.delivered.todate', 'vaccination.novavax.delivered', 'vaccination.novavax.delivered.todate', 'vaccination.age.0-11.1st.todate', 'vaccination.age.0-11.2nd.todate', 'vaccination.age.0-11.3rd.todate', 'vaccination.age.12-17.1st.todate', 'vaccination.age.12-17.2nd.todate', 'vaccination.age.12-17.3rd.todate', 'vaccination.age.18-24.1st.todate', 'vaccination.age.18-24.2nd.todate', 'vaccination.age.18-24.3rd.todate', 'vaccination.age.25-29.1st.todate', 'vaccination.age.25-29.2nd.todate', 'vaccination.age.25-29.3rd.todate', 'vaccination.age.30-34.1st.todate', 'vaccination.age.30-34.2nd.todate', 'vaccination.age.30-34.3rd.todate', 'vaccination.age.35-39.1st.todate', 'vaccination.age.35-39.2nd.todate', 'vaccination.age.35-39.3rd.todate', 'vaccination.age.40-44.1st.todate', 'vaccination.age.40-44.2nd.todate', 'vaccination.age.40-44.3rd.todate', 'vaccination.age.45-49.1st.todate', 'vaccination.age.45-49.2nd.todate', 'vaccination.age.45-49.3rd.todate', 'vaccination.age.50-54.1st.todate', 'vaccination.age.50-54.2nd.todate', 'vaccination.age.50-54.3rd.todate', 'vaccination.age.55-59.1st.todate', 'vaccination.age.55-59.2nd.todate', 'vaccination.age.55-59.3rd.todate', 'vaccination.age.60-64.1st.todate', 'vaccination.age.60-64.2nd.todate', 'vaccination.age.60-64.3rd.todate', 'vaccination.age.65-69.1st.todate', 'vaccination.age.65-69.2nd.todate', 'vaccination.age.65-69.3rd.todate', 'vaccination.age.70-74.1st.todate', 'vaccination.age.70-74.2nd.todate', 'vaccination.age.70-74.3rd.todate', 'vaccination.age.75-79.1st.todate', 'vaccination.age.75-79.2nd.todate', 'vaccination.age.75-79.3rd.todate', 'vaccination.age.80-84.1st.todate', 'vaccination.age.80-84.2nd.todate', 'vaccination.age.80-84.3rd.todate', 'vaccination.age.85-89.1st.todate', 'vaccination.age.85-89.2nd.todate', 'vaccination.age.85-89.3rd.todate', 'vaccination.age.90+.1st.todate', 'vaccination.age.90+.2nd.todate', 'vaccination.age.90+.3rd.todate' ], axis='columns') merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n') write_timestamp_file(filename=filename, old_hash=old_hash)
def import_nijz_dash_labtests(): filenameByDay = "csv/lab-tests.csv" print("Processing", filenameByDay) df_existing = pd.read_csv(filenameByDay, index_col='date', parse_dates=['date']) print(df_existing) d = cepimose.lab_end_timestamp() print(f"Adding/updating lab test data for {d.date()}") # copy last row structure, with None values: day_data = dict.fromkeys(df_existing.tail(1), None) day_data['tests.regular.performed'] = cepimose.lab_PCR_tests_performed() # PCR+HAT day_data['tests.regular.positive'] = cepimose.lab_cases_confirmed() day_data['tests.hagt.performed'] = cepimose.lab_HAT_tests_performed() day_data['tests.performed'] = day_data['tests.regular.performed'] # PCR+HAT day_data['tests.positive'] = day_data['tests.regular.positive'] df_day_data = pd.DataFrame([day_data], index=[d]) df_day_data.index.name = 'date' print(df_day_data) df_updated = df_day_data.combine_first(df_existing).fillna( 0).round().replace({ 0: None }).astype('Int64') #recalculate .todate fields df_updated['tests.regular.performed.todate'] = \ df_updated['tests.regular.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64') df_updated['tests.regular.positive.todate'] = \ df_updated['tests.regular.positive'].fillna(0).cumsum().replace({0: None}).astype('Int64') df_updated['tests.hagt.performed.todate'] = \ df_updated['tests.hagt.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64') df_updated['tests.performed.todate'] = \ df_updated['tests.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64') df_updated['tests.positive.todate'] = \ df_updated['tests.positive'].fillna(0).cumsum().replace({0: None}).astype('Int64') print(df_updated) yesterday = date.today() - timedelta(days=1) if d.date() < yesterday: print( f"ABORTING update with too old date {d.date()} ({d}). Now is {datetime.now()}" ) print( f"Cases {cepimose.lab_cases_confirmed()}, PCR: {cepimose.lab_PCR_tests_performed()} HAT: {cepimose.lab_HAT_tests_performed()}" ) exit(1) old_hash = sha1sum(filenameByDay) df_updated.to_csv(filenameByDay, date_format='%Y-%m-%d', line_terminator='\r\n') write_timestamp_file(filenameByDay, old_hash) return cepimose.lab_cases_confirmed( ) # PCR+HAT positive is confirmed cases
def import_nijz_dash_vacc_by_municipalities(): filename = "csv/vaccination-by_municipality-latest.csv" filenameByDay = "csv/vaccination-by_municipality.csv" print("Processing", filename) print("Processing", filenameByDay) municipalities = pd.read_csv("csv/dict-municipality.csv", index_col="id")[[ "region", "iso_code", "name", "name_alt", "population" ]] # uppercase for easy matching municipalities['name_search'] = municipalities['name'].str.upper() municipalities['name_alt'] = municipalities['name_alt'].str.upper() municipalities['name_id'] = municipalities.index.str.upper() for row in cepimose.vaccinations_by_municipalities_share(): nameNormalized = row.name.upper().replace('-', ' - ') mun = municipalities.loc[municipalities['name_search'] == nameNormalized] if mun is None or mun.empty: mun = municipalities.loc[municipalities['name_alt'] == nameNormalized] if mun is None or mun.empty: mun = municipalities.loc[municipalities['name_id'] == nameNormalized] if mun is None or mun.empty: mun = municipalities.loc[municipalities['name_search'] == nameNormalized.replace("SV. ", "SVETA ")] if mun is None or mun.empty: raise Exception(f'No municipality match: {row.name}') if len(mun.index) > 1: raise Exception( f'{len(mun.index)} municipalities match: {row.name}') # pop=mun.to_records()[0].population # if pop != row.population: # # comment this out if it starts to fail to continue scraping until the population is fixed in dict-municipality.csv # raise Exception(f'Population mismatch in {row.name}: {pop} (dict-municipality.csv) != {row.population} (NIJZ)') # add new columns: munId = mun.to_records()[0].id municipalities.loc[ munId, 'population'] = row.population # overwrite the population with the one from NIJZ, could differ from the one in dict-municipality.csv municipalities.loc[munId, '1st.todate'] = row.dose1 municipalities.loc[munId, '1st.share.todate'] = round(row.share1, 5) municipalities.loc[munId, '2nd.todate'] = row.dose2 municipalities.loc[munId, '2nd.share.todate'] = round(row.share2, 5) # trim down extra columns municipalities = municipalities[[ 'region', 'iso_code', 'name', 'population', '1st.todate', '1st.share.todate', '2nd.todate', '2nd.share.todate' ]] municipalities['1st.todate'] = municipalities['1st.todate'].astype('Int64') municipalities['2nd.todate'] = municipalities['2nd.todate'].astype('Int64') municipalities.dropna(thresh=4, inplace=True) print(municipalities) old_hash = sha1sum(filename) municipalities.to_csv(filename) write_timestamp_file(filename, old_hash) # daily history today_data = {} for id, m in municipalities.iterrows(): fieldPrefix = f'vaccination.region.{m["region"]}.{id}.' # today_data[f'{fieldPrefix}population'] = m["population"] today_data[f'{fieldPrefix}1st.todate'] = m["1st.todate"] # today_data[f'{fieldPrefix}1st.share.todate'] = m["1st.share.todate"] today_data[f'{fieldPrefix}2nd.todate'] = m["2nd.todate"] # today_data[f'{fieldPrefix}2nd.share.todate'] = m["2nd.share.todate"] df_today = pd.DataFrame([today_data], index=[datetime.date.today()]) df_today.index.name = 'date' # print(df_today) # uncomment if there's no previous history file: # df_today.to_csv(filenameByDay, date_format='%Y-%m-%d') # write_timestamp_file(filenameByDay, "") df_existing = pd.read_csv(filenameByDay, index_col=0, parse_dates=[0]) # print(df_existing) df_updated = df_today.combine_first(df_existing).fillna(0).round().replace( { 0: None }).astype('Int64') print(df_updated) old_hash = sha1sum(filenameByDay) df_updated.to_csv(filenameByDay, date_format='%Y-%m-%d') write_timestamp_file(filenameByDay, old_hash)
def import_nijz_dash_vacc_by_region(): filename = "csv/vaccination-by_region.csv" print("Processing", filename) df = pd.DataFrame() vaccByRegion = cepimose.vaccinations_by_region_by_day() # map cepimose regions to sledilnik regions, preserving previous order regions = { cepimose.data.Region.OBALNOKRASKA: "kp", cepimose.data.Region.GORISKA: "ng", cepimose.data.Region.PRIMORSKONOTRANJSKA: "po", cepimose.data.Region.GORENJSKA: "kr", cepimose.data.Region.OSREDNJESLOVENSKA: "lj", cepimose.data.Region.JUGOVZHODNASLOVENIJA: "nm", cepimose.data.Region.POSAVSKA: "kk", cepimose.data.Region.ZASAVSKA: "za", cepimose.data.Region.SAVINJSKA: "ce", cepimose.data.Region.KOROSKA: "sg", cepimose.data.Region.PODRAVSKA: "mb", cepimose.data.Region.POMURSKA: "ms", } # join all regions for reg in regions: print("Joining {r} ({reg}): {c} rows:".format(r=regions[reg], reg=reg, c=len( vaccByRegion[reg]))) regData = pd.DataFrame.from_dict(vaccByRegion[reg]).set_index('date') regData["first_diff"] = regData["first_dose"].diff() regData["second_diff"] = regData["second_dose"].diff() regData["third_diff"] = regData["third_dose"].diff() regData = regData[[ 'first_diff', 'first_dose', 'second_diff', 'second_dose', 'third_diff', 'third_dose' ]] regData.rename( inplace=True, columns={ 'first_diff': 'vaccination.region.{}.1st'.format(regions[reg]), 'first_dose': 'vaccination.region.{}.1st.todate'.format(regions[reg]), 'second_diff': 'vaccination.region.{}.2nd'.format(regions[reg]), 'second_dose': 'vaccination.region.{}.2nd.todate'.format(regions[reg]), 'third_diff': 'vaccination.region.{}.3rd'.format(regions[reg]), 'third_dose': 'vaccination.region.{}.3rd.todate'.format(regions[reg]), }) print(regData) print(regData.describe()) df = df.join(regData, how='outer') print(df) print(df.describe()) # write csv old_hash = sha1sum(filename) # force integer type df.fillna(0).round().astype('Int64').replace({ 0: None }).to_csv(filename, date_format="%Y-%m-%d", line_terminator='\r\n') write_timestamp_file(filename, old_hash)
def import_nijz_dash_vacc_used_by_manufacturer(): filename = "csv/vaccination-used_by_manufacturer.csv" # add used by manufacturers df = pd.DataFrame.from_dict( cepimose.vaccinations_by_manufacturer_used()).set_index('date').rename( columns={ 'pfizer': 'vaccination.pfizer.used', 'moderna': 'vaccination.moderna.used', 'az': 'vaccination.az.used', 'janssen': 'vaccination.janssen.used', 'novavax': 'vaccination.novavax.used' }).astype('Int64') df['vaccination.pfizer.used.todate'] = \ df['vaccination.pfizer.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') df['vaccination.moderna.used.todate'] = \ df['vaccination.moderna.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') df['vaccination.az.used.todate'] = \ df['vaccination.az.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') df['vaccination.janssen.used.todate'] = \ df['vaccination.janssen.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') df['vaccination.novavax.used.todate'] = \ df['vaccination.novavax.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') # calculate used vaccine doeses used total df['vaccination.used'] = \ df['vaccination.pfizer.used'].fillna(0) + \ df['vaccination.moderna.used'].fillna(0) + \ df['vaccination.az.used'].fillna(0) + \ df['vaccination.janssen.used'].fillna(0) + \ df['vaccination.novavax.used'].fillna(0) df['vaccination.used.todate'] = \ df['vaccination.used'].fillna(0).cumsum().replace({0: None}).astype('Int64') df = df.astype('Int64') # sort cols df = df[[ 'vaccination.used', 'vaccination.used.todate', 'vaccination.pfizer.used', 'vaccination.pfizer.used.todate', 'vaccination.moderna.used', 'vaccination.moderna.used.todate', 'vaccination.az.used', 'vaccination.az.used.todate', 'vaccination.janssen.used', 'vaccination.janssen.used.todate', 'vaccination.novavax.used', 'vaccination.novavax.used.todate', ]] df = df.replace({0: None}).astype('Int64') today = datetime.date.today().isoformat() if today in df.index and pd.isna(df.loc[today]['vaccination.used']): df.drop(df.loc[df.index == today].index, inplace=True) # write csv old_hash = sha1sum(filename) # replace 0 with pd.NA so it does not get written to CSV df.replace(0, pd.NA).to_csv(filename, date_format='%Y-%m-%d') write_timestamp_file(filename, old_hash)
def computeStats(update_time): filename = 'csv/stats.csv' print("Processing", filename) old_hash = sha1sum(filename) df_patients = pd.read_csv( 'csv/patients.csv', index_col='date', parse_dates=['date'])[[ 'state.in_hospital', 'state.in_hospital.todate', 'state.icu', 'state.critical', 'state.out_of_hospital.todate', 'state.deceased.todate', 'state.recovered.todate' ]] df_phases = pd.read_csv('csv/dict-phases.csv', index_col='date.from', parse_dates=['date.from' ]).rename(mapper={'id': 'phase'}, axis='columns')[['phase']] df_phases = df_phases.reindex(pd.date_range(df_phases.index.min(), df_patients.index.max(), freq='D'), method='ffill') df_phases.index.name = 'date' dfRegions = pd.read_csv('csv/regions-cases.csv', index_col='date') dfAgeC = pd.read_csv('csv/age-cases.csv', index_col='date') dfAgeD = pd.read_csv('csv/age-deceased.csv', index_col='date') dfRhD = pd.read_csv('csv/rh-deceased.csv', index_col='date') df_lab_tests = pd.read_csv('csv/lab-tests.csv', index_col='date')[[ 'tests.performed', 'tests.performed.todate', 'tests.positive', 'tests.positive.todate', 'tests.regular.performed', 'tests.regular.performed.todate', 'tests.regular.positive', 'tests.regular.positive.todate', 'tests.hagt.performed', 'tests.hagt.performed.todate', 'tests.hagt.positive', 'tests.hagt.positive.todate', 'tests.ns-apr20.performed', 'tests.ns-apr20.performed.todate', 'tests.ns-apr20.positive', 'tests.ns-apr20.positive.todate', ]] df_cases = pd.read_csv('csv/cases.csv', index_col='date')[[ 'cases.confirmed', 'cases.confirmed.todate', 'cases.active', 'cases.recovered.todate', 'cases.closed.todate', 'cases.hs.employee.confirmed.todate', 'cases.rh.employee.confirmed.todate', 'cases.rh.occupant.confirmed.todate', ]] merged = df_phases.join(df_patients).join(dfRegions).join(dfAgeC).join( dfAgeD).join(dfRhD).join(df_lab_tests).join(df_cases) merged['cases.unclassified.confirmed.todate'] = merged['cases.confirmed.todate'] \ .sub(merged['cases.hs.employee.confirmed.todate'], fill_value=0) \ .sub(merged['cases.rh.employee.confirmed.todate'], fill_value=0) \ .sub(merged['cases.rh.occupant.confirmed.todate'], fill_value=0) merged.insert(loc=0, column='day', value=range(-8, -8 + len(merged))) merged.reset_index(inplace=True) merged.set_index('day', inplace=True) merged = merged.reindex( [ # sort 'date', 'phase', 'tests.performed.todate', 'tests.performed', 'tests.positive.todate', 'tests.positive', 'tests.regular.performed.todate', 'tests.regular.performed', 'tests.regular.positive.todate', 'tests.regular.positive', 'tests.hagt.performed', 'tests.hagt.performed.todate', 'tests.hagt.positive', 'tests.hagt.positive.todate', 'tests.ns-apr20.performed.todate', 'tests.ns-apr20.performed', 'tests.ns-apr20.positive.todate', 'tests.ns-apr20.positive', 'cases.confirmed.todate', 'cases.confirmed', 'cases.active', 'cases.recovered.todate', 'cases.closed.todate', 'cases.hs.employee.confirmed.todate', 'cases.rh.employee.confirmed.todate', 'cases.rh.occupant.confirmed.todate', 'cases.unclassified.confirmed.todate', 'state.in_hospital', 'state.icu', 'state.critical', 'state.in_hospital.todate', 'state.out_of_hospital.todate', 'state.deceased.todate', 'state.recovered.todate', 'region.lj.todate', 'region.ce.todate', 'region.mb.todate', 'region.ms.todate', 'region.kr.todate', 'region.nm.todate', 'region.za.todate', 'region.sg.todate', 'region.po.todate', 'region.ng.todate', 'region.kp.todate', 'region.kk.todate', 'region.foreign.todate', 'region.unknown.todate', 'region.todate', 'age.0-4.todate', 'age.5-14.todate', 'age.15-24.todate', 'age.25-34.todate', 'age.35-44.todate', 'age.45-54.todate', 'age.55-64.todate', 'age.65-74.todate', 'age.75-84.todate', 'age.85+.todate', 'age.todate', 'age.female.0-4.todate', 'age.female.5-14.todate', 'age.female.15-24.todate', 'age.female.25-34.todate', 'age.female.35-44.todate', 'age.female.45-54.todate', 'age.female.55-64.todate', 'age.female.65-74.todate', 'age.female.75-84.todate', 'age.female.85+.todate', 'age.female.todate', 'age.male.0-4.todate', 'age.male.5-14.todate', 'age.male.15-24.todate', 'age.male.25-34.todate', 'age.male.35-44.todate', 'age.male.45-54.todate', 'age.male.55-64.todate', 'age.male.65-74.todate', 'age.male.75-84.todate', 'age.male.85+.todate', 'age.male.todate', 'age.unknown.0-4.todate', 'age.unknown.5-14.todate', 'age.unknown.15-24.todate', 'age.unknown.25-34.todate', 'age.unknown.35-44.todate', 'age.unknown.45-54.todate', 'age.unknown.55-64.todate', 'age.unknown.65-74.todate', 'age.unknown.75-84.todate', 'age.unknown.85+.todate', 'age.unknown.todate', 'deceased.0-4.todate', 'deceased.5-14.todate', 'deceased.15-24.todate', 'deceased.25-34.todate', 'deceased.35-44.todate', 'deceased.45-54.todate', 'deceased.55-64.todate', 'deceased.65-74.todate', 'deceased.75-84.todate', 'deceased.85+.todate', 'deceased.todate', 'deceased.female.0-4.todate', 'deceased.female.5-14.todate', 'deceased.female.15-24.todate', 'deceased.female.25-34.todate', 'deceased.female.35-44.todate', 'deceased.female.45-54.todate', 'deceased.female.55-64.todate', 'deceased.female.65-74.todate', 'deceased.female.75-84.todate', 'deceased.female.85+.todate', 'deceased.female.todate', 'deceased.male.0-4.todate', 'deceased.male.5-14.todate', 'deceased.male.15-24.todate', 'deceased.male.25-34.todate', 'deceased.male.35-44.todate', 'deceased.male.45-54.todate', 'deceased.male.55-64.todate', 'deceased.male.65-74.todate', 'deceased.male.75-84.todate', 'deceased.male.85+.todate', 'deceased.male.todate', 'deceased.rhoccupant.todate', 'deceased.other.todate' ], axis='columns') merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n') write_timestamp_file(filename=filename, old_hash=old_hash)
def import_nijz_dash_vacc_by_age(): filename = "csv/vaccination-by_age.csv" df_existing = pd.read_csv(filename, index_col=0, parse_dates=[0]) today_data = {} for row in cepimose.vaccinations_by_age(): today_data["vaccination.age.{}.1st.todate".format( row.age_group)] = row.count_first today_data["vaccination.age.{}.2nd.todate".format( row.age_group)] = row.count_second df_today = pd.DataFrame([today_data], index=[datetime.date.today()]) df_today.index.name = 'date' def start_age(colname: str): return int(colname.split('.')[2].split('-')[0].strip('+')) def phase(colname: str): return colname.split('.')[3] # columns to be calculates columns_1864_1st = list( filter(lambda s: start_age(s) < 65 and phase(s) == '1st', df_today.columns)) columns_1864_2nd = list( filter(lambda s: start_age(s) < 65 and phase(s) == '2nd', df_today.columns)) columns_65_1st = list( filter(lambda s: start_age(s) >= 65 and phase(s) == '1st', df_today.columns)) columns_65_2nd = list( filter(lambda s: start_age(s) >= 65 and phase(s) == '2nd', df_today.columns)) df_today['vaccination.age.18-64.1st.todate'] = df_today[ columns_1864_1st].sum(axis=1) df_today['vaccination.age.18-64.2nd.todate'] = df_today[ columns_1864_2nd].sum(axis=1) df_today['vaccination.age.65+.1st.todate'] = df_today[columns_65_1st].sum( axis=1) df_today['vaccination.age.65+.2nd.todate'] = df_today[columns_65_2nd].sum( axis=1) df_updated = df_today.combine_first(df_existing).astype('Int64') col_order = [ 'vaccination.age.0-17.1st.todate', 'vaccination.age.0-17.2nd.todate', 'vaccination.age.18-24.1st.todate', 'vaccination.age.18-24.2nd.todate', 'vaccination.age.25-29.1st.todate', 'vaccination.age.25-29.2nd.todate', 'vaccination.age.30-34.1st.todate', 'vaccination.age.30-34.2nd.todate', 'vaccination.age.35-39.1st.todate', 'vaccination.age.35-39.2nd.todate', 'vaccination.age.40-44.1st.todate', 'vaccination.age.40-44.2nd.todate', 'vaccination.age.45-49.1st.todate', 'vaccination.age.45-49.2nd.todate', 'vaccination.age.50-54.1st.todate', 'vaccination.age.50-54.2nd.todate', 'vaccination.age.55-59.1st.todate', 'vaccination.age.55-59.2nd.todate', 'vaccination.age.60-64.1st.todate', 'vaccination.age.60-64.2nd.todate', 'vaccination.age.65-69.1st.todate', 'vaccination.age.65-69.2nd.todate', 'vaccination.age.70-74.1st.todate', 'vaccination.age.70-74.2nd.todate', 'vaccination.age.75-79.1st.todate', 'vaccination.age.75-79.2nd.todate', 'vaccination.age.80-84.1st.todate', 'vaccination.age.80-84.2nd.todate', 'vaccination.age.85-89.1st.todate', 'vaccination.age.85-89.2nd.todate', 'vaccination.age.90+.1st.todate', 'vaccination.age.90+.2nd.todate', 'vaccination.age.18-64.1st.todate', 'vaccination.age.18-64.2nd.todate', 'vaccination.age.65+.1st.todate', 'vaccination.age.65+.2nd.todate' ] df_updated = df_updated[col_order] old_hash = sha1sum(filename) df_updated.astype('Int64').to_csv(filename, date_format='%Y-%m-%d') write_timestamp_file(filename, old_hash)