Пример #1
0
def parse_date(date):
    return datetime_isoformat('%s-%d' % (date, datetime.now().year), '%d-%b-%Y')
Пример #2
0
#!/usr/bin/env python

import re
import sys
from pandas import read_csv
from utils import ROOT, datetime_isoformat, read_metadata, safe_int_cast

data = read_csv('https://ocgptweb.azurewebsites.net/CSVDownload')
data = data.drop(columns=['CountryName', 'ConfirmedCases', 'ConfirmedDeaths'])
data = data.drop(
    columns=[col for col in data.columns if col.endswith('_Notes')])
data = data.drop(
    columns=[col for col in data.columns if col.endswith('_IsGeneral')])
data['Date'] = data['Date'].apply(lambda x: datetime_isoformat(x, '%Y%m%d'))

# Join with ISO data
iso = read_csv(ROOT / 'input' /
               'ISO-3166-2.csv')[['3166-2-Alpha-2', '3166-2-Alpha-3']]
data = data.rename(columns={'CountryCode': '3166-2-Alpha-3'}).merge(iso)

# Join with our metadata
metadata = read_metadata()[['Key', 'CountryCode']]
data = data.rename(columns={'3166-2-Alpha-2': 'CountryCode'}).merge(metadata)

# Use consistent naming convention for columns
data = data[[
    col for col in data.columns
    if '_' in col or col in ('Date', 'Key', 'StringencyIndex')
]]
data.columns = [col.split('_')[-1] for col in data.columns]
data.columns = [
Пример #3
0
# Retrieve the CSV files from https://covid19.isciii.es
df = read_argv(encoding='ISO-8859-1').rename(columns={
    'FECHA': 'Date',
    'CCAA': 'RegionCode',
    'Fallecidos': 'Deaths'
}).dropna(subset=['Date'])

# Add the country code to all records
df['CountryCode'] = 'ES'

# Confirmed cases are split across 3 columns
confirmed_columns = ['CASOS', 'PCR+', 'TestAc+']
for col in confirmed_columns:
    df[col] = df[col].fillna(0)
df['Confirmed'] = df.apply(lambda x: sum([x[col]
                                          for col in confirmed_columns]),
                           axis=1)

# Convert dates to ISO format
df['Date'] = df['Date'].apply(
    lambda date: datetime_isoformat(date, '%d/%m/%Y'))

# Country-wide is the sum of all regions
region_level = df
country_level = df.groupby(['Date', 'CountryCode']).sum().reset_index()

# Output the results
dataframe_output(country_level)
dataframe_output(region_level, 'ES')