def read_metadata(): ''' Reads the country and region metadata file ''' column_dtypes = { 'Date': str, 'CountryCode': str, 'CountryName': str, 'RegionCode': str, 'RegionName': str, '_RegionLabel': str, '_ReportOffsetDays': int, 'Latitude': str, 'Longitude': str, 'Population': 'Int64' } metadata = read_file(ROOT / 'input' / 'metadata.csv', dtype=column_dtypes, keep_default_na=False, na_values=['']) # Make sure that all entries have a valid region label column metadata['_RegionLabel'] = metadata.apply(_infer_region_label, axis=1) return metadata
def read_metadata(): ''' Reads the country and region metadata file ''' column_dtypes = { 'Date': str, 'CountryCode': str, 'CountryName': str, 'RegionCode': str, 'RegionName': str, '_RegionLabel': str, '_ReportOffsetDays': int, 'Latitude': str, 'Longitude': str, 'Population': 'Int64' } metadata = read_file( ROOT / 'input' / 'metadata.csv', dtype=column_dtypes, keep_default_na=False, na_values=['']) return metadata
# FR_GES,FR,France,GES,Grand Est,GES,48.699800,6.187800, # FR_GF,FR,France,GF,French Guiana,GF,3.933900,-53.125800, # FR_GUA,FR,France,GUA,Guadeloupe,GUA,16.265000,-61.551000, # FR_HDF,FR,France,HDF,Hauts-de-France,HDF,50.480100,2.793700, # FR_IDF,FR,France,IDF,Île-de-France,IDF,48.849900,2.637000, # FR_LRE,FR,France,LRE,La Réunion,LRE,-21.115100,55.536400, # FR_MAY,FR,France,MAY,Mayotte,MAY,-12.827500,45.166200, # FR_MQ,FR,France,MQ,Martinique,MQ,14.641500,-61.024200, # FR_NAQ,FR,France,NAQ,Nouvelle-Aquitaine,NAQ,45.708700,0.626900, # FR_NOR,FR,France,NOR,Normandy,NOR,48.879900,0.171300, # FR_OCC,FR,France,OCC,Occitanie,OCC,43.892700,3.282800, # FR_PAC,FR,France,PAC,Provence-Alpes-Côte d'Azur,PAC,43.935200,6.067900, # FR_PDL,FR,France,PDL,Pays de la Loire,PDL,47.763300,-0.330000, # Read the ISO mappings for department -> region iso = read_file(sys.argv[1], table_index=2, header=True) region_column = [col for col in iso.columns if 'region' in col.lower()][0] dep_map = {idx[3:]: code for idx, code in zip(iso['Code'], iso[region_column])} # Add a few extra departments not in agreement with Wikipedia dep_map['971'] = 'GUA' dep_map['972'] = 'MQ' dep_map['973'] = 'GF' dep_map['974'] = 'LRE' dep_map['976'] = 'MAY' # Read the data from data.gouv.fr confirmed = read_file(sys.argv[2], sep=';').rename( columns={ 'jour': 'Date', 'dep': 'RegionCode',
#!/usr/bin/env python import sys from datetime import datetime from pandas import DataFrame from covid_io import read_file from utils import cumsum, dataframe_output # Read the ISO mappings for department -> region iso = read_file(sys.argv[1], table_index=2, header=True) region_column = [col for col in iso.columns if 'region' in col.lower()][0] dep_map = {idx[3:]: code for idx, code in zip(iso['Code'], iso[region_column])} # Read the data from data.gouv.fr data = read_file(sys.argv[2], sep=';').rename( columns={ 'jour': 'Date', 'dep': 'RegionName', 'incid_dc': 'Deaths', 'incid_rea': 'Critical', }) # Map the department to the region data['RegionName'] = data['RegionName'].apply(lambda dep: dep_map.get(dep)) # Estimate confirmed cases from the critical ones data['Confirmed'] = data['Critical'].apply(lambda x: x / .075) # Data is new cases, perform the cumsum to get total keys = ['RegionName', 'Date'] data = cumsum(data.dropna(subset=keys), keys)
parser.add_argument('--skiprows', type=int, default=1) parser.add_argument('--skipcols', type=int, default=2) parser.add_argument('--droprows', type=str, default=None) parser.add_argument('--date-format', type=str, default='%b %d') parser.add_argument('--table-index', type=int, default=0) parser.add_argument('--null-deaths', action='store_true') parser.add_argument('--debug', action='store_true') args = parser.parse_args(sys.argv[1:]) # We need to set locale in order to parse dates properly locale.setlocale(locale.LC_TIME, args.locale) data = read_file( getattr(args, 'html-file'), header=True, selector='table.wikitable', parser=wiki_html_cell_parser, table_index=args.table_index, skiprows=args.skiprows) if args.debug: print('Data:') print(data.head(50)) # Some of the tables are in Spanish data = data.rename(columns={'Fecha': 'Date'}) # Set date column as index columns_lowercase = [(col or '').lower() for col in data.columns] date_index = columns_lowercase.index('date') if 'date' in columns_lowercase else 0 data = data.set_index(data.columns[date_index])