Пример #1
0
def read_metadata():
    ''' Reads the country and region metadata file '''

    column_dtypes = {
        'Date': str,
        'CountryCode': str,
        'CountryName': str,
        'RegionCode': str,
        'RegionName': str,
        '_RegionLabel': str,
        '_ReportOffsetDays': int,
        'Latitude': str,
        'Longitude': str,
        'Population': 'Int64'
    }

    metadata = read_file(ROOT / 'input' / 'metadata.csv',
                         dtype=column_dtypes,
                         keep_default_na=False,
                         na_values=[''])

    # Make sure that all entries have a valid region label column
    metadata['_RegionLabel'] = metadata.apply(_infer_region_label, axis=1)

    return metadata
Пример #2
0
def read_metadata():
    ''' Reads the country and region metadata file '''

    column_dtypes = {
        'Date': str,
        'CountryCode': str,
        'CountryName': str,
        'RegionCode': str,
        'RegionName': str,
        '_RegionLabel': str,
        '_ReportOffsetDays': int,
        'Latitude': str,
        'Longitude': str,
        'Population': 'Int64'
    }

    metadata = read_file(
        ROOT / 'input' / 'metadata.csv', dtype=column_dtypes, keep_default_na=False, na_values=[''])

    return metadata
Пример #3
0
# FR_GES,FR,France,GES,Grand Est,GES,48.699800,6.187800,
# FR_GF,FR,France,GF,French Guiana,GF,3.933900,-53.125800,
# FR_GUA,FR,France,GUA,Guadeloupe,GUA,16.265000,-61.551000,
# FR_HDF,FR,France,HDF,Hauts-de-France,HDF,50.480100,2.793700,
# FR_IDF,FR,France,IDF,Île-de-France,IDF,48.849900,2.637000,
# FR_LRE,FR,France,LRE,La Réunion,LRE,-21.115100,55.536400,
# FR_MAY,FR,France,MAY,Mayotte,MAY,-12.827500,45.166200,
# FR_MQ,FR,France,MQ,Martinique,MQ,14.641500,-61.024200,
# FR_NAQ,FR,France,NAQ,Nouvelle-Aquitaine,NAQ,45.708700,0.626900,
# FR_NOR,FR,France,NOR,Normandy,NOR,48.879900,0.171300,
# FR_OCC,FR,France,OCC,Occitanie,OCC,43.892700,3.282800,
# FR_PAC,FR,France,PAC,Provence-Alpes-Côte d'Azur,PAC,43.935200,6.067900,
# FR_PDL,FR,France,PDL,Pays de la Loire,PDL,47.763300,-0.330000,

# Read the ISO mappings for department -> region
iso = read_file(sys.argv[1], table_index=2, header=True)
region_column = [col for col in iso.columns if 'region' in col.lower()][0]
dep_map = {idx[3:]: code for idx, code in zip(iso['Code'], iso[region_column])}

# Add a few extra departments not in agreement with Wikipedia
dep_map['971'] = 'GUA'
dep_map['972'] = 'MQ'
dep_map['973'] = 'GF'
dep_map['974'] = 'LRE'
dep_map['976'] = 'MAY'

# Read the data from data.gouv.fr
confirmed = read_file(sys.argv[2], sep=';').rename(
    columns={
        'jour': 'Date',
        'dep': 'RegionCode',
Пример #4
0
#!/usr/bin/env python

import sys
from datetime import datetime
from pandas import DataFrame
from covid_io import read_file
from utils import cumsum, dataframe_output

# Read the ISO mappings for department -> region
iso = read_file(sys.argv[1], table_index=2, header=True)
region_column = [col for col in iso.columns if 'region' in col.lower()][0]
dep_map = {idx[3:]: code for idx, code in zip(iso['Code'], iso[region_column])}

# Read the data from data.gouv.fr
data = read_file(sys.argv[2], sep=';').rename(
    columns={
        'jour': 'Date',
        'dep': 'RegionName',
        'incid_dc': 'Deaths',
        'incid_rea': 'Critical',
    })

# Map the department to the region
data['RegionName'] = data['RegionName'].apply(lambda dep: dep_map.get(dep))

# Estimate confirmed cases from the critical ones
data['Confirmed'] = data['Critical'].apply(lambda x: x / .075)

# Data is new cases, perform the cumsum to get total
keys = ['RegionName', 'Date']
data = cumsum(data.dropna(subset=keys), keys)
Пример #5
0
parser.add_argument('--skiprows', type=int, default=1)
parser.add_argument('--skipcols', type=int, default=2)
parser.add_argument('--droprows', type=str, default=None)
parser.add_argument('--date-format', type=str, default='%b %d')
parser.add_argument('--table-index', type=int, default=0)
parser.add_argument('--null-deaths', action='store_true')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args(sys.argv[1:])

# We need to set locale in order to parse dates properly
locale.setlocale(locale.LC_TIME, args.locale)

data = read_file(
    getattr(args, 'html-file'),
    header=True,
    selector='table.wikitable',
    parser=wiki_html_cell_parser,
    table_index=args.table_index,
    skiprows=args.skiprows)

if args.debug:
    print('Data:')
    print(data.head(50))

# Some of the tables are in Spanish
data = data.rename(columns={'Fecha': 'Date'})

# Set date column as index
columns_lowercase = [(col or '').lower() for col in data.columns]
date_index = columns_lowercase.index('date') if 'date' in columns_lowercase else 0
data = data.set_index(data.columns[date_index])