#!/usr/bin/env python import pandas import datetime from covid_io import read_argv from utils import dataframe_output # Read CSV file from covidtracking's GitHub project data = read_argv() # Rename the appropriate columns data = data.rename( columns={ 'date': 'Date', 'prname': '_RegionLabel', 'numconf': 'Confirmed', 'numdeaths': 'Deaths', 'numtested': 'Tested' }) # Convert date to datetime object data['Date'] = data['Date'].apply(lambda date: datetime.datetime.strptime( date, '%d-%m-%Y').date().isoformat()) # Output the results dataframe_output(data, 'CA')
#!/usr/bin/env python import sys from datetime import datetime, timedelta from pandas import isna from covid_io import read_argv from utils import dataframe_output, merge_previous # Confirmed and deaths come from different CSV files, parse them separately first confirmed, deaths, prev_data = read_argv() confirmed = confirmed.rename(columns={ 'fecha': 'Date', 'CCAA': '_RegionLabel', 'total': 'Confirmed' }) deaths = deaths.rename(columns={ 'fecha': 'Date', 'CCAA': '_RegionLabel', 'total': 'Deaths' }) # Now we can simply join them into the same table df = confirmed.merge(deaths) # Parse date into a datetime object df['Date'] = df['Date'].apply(lambda date: datetime.fromisoformat(date).date()) # Add the country code to all records df['CountryCode'] = 'ES' # Convert dates to ISO format
#!/usr/bin/env python ''' This script loads the latest JSON from covidtracking.com website and extracts the confirmed cases, deaths and total tests for each state. Credit to the covidtracking.com team for scraping the data from each state. ''' import sys import datetime from covid_io import read_argv from utils import dataframe_output # Read CSV file from covidtracking's GitHub project df = read_argv() # Rename the appropriate columns df = df.rename( columns={ 'date': 'Date', 'state': 'RegionCode', 'positive': 'Confirmed', 'death': 'Deaths', 'total': 'Tested' }) # Convert date to ISO format df['Date'] = df['Date'].apply(lambda date: datetime.datetime.strptime( str(date), '%Y%m%d').date().isoformat()) # Output the results
#!/usr/bin/env python from datetime import datetime from pandas import DataFrame from covid_io import read_argv from utils import dataframe_output # Read data from GitHub repo confirmed, deaths = read_argv() for df in (confirmed, deaths): df.rename(columns={'Unnamed: 1': 'RegionCode'}, inplace=True) df.set_index('RegionCode', inplace=True) # Transform the data from non-tabulated format to record format records = [] for region_code in confirmed.index.unique(): for col in confirmed.columns[1:]: date = col + '/' + str(datetime.now().year) date = datetime.strptime(date, '%d/%m/%Y').date().isoformat() records.append({ 'Date': date, 'RegionCode': region_code, 'Confirmed': confirmed.loc[region_code, col], 'Deaths': deaths.loc[region_code, col]}) df = DataFrame.from_records(records) # Output the results dataframe_output(df, 'BR')
#!/usr/bin/env python import sys from covid_io import read_argv from utils import dataframe_output, datetime_isoformat # Retrieve the CSV files from https://covid19.isciii.es df = read_argv(encoding='ISO-8859-1').rename(columns={ 'FECHA': 'Date', 'CCAA': 'RegionCode', 'Fallecidos': 'Deaths' }).dropna(subset=['Date']) # Add the country code to all records df['CountryCode'] = 'ES' # Confirmed cases are split across 3 columns confirmed_columns = ['CASOS', 'PCR+', 'TestAc+'] for col in confirmed_columns: df[col] = df[col].fillna(0) df['Confirmed'] = df.apply(lambda x: sum([x[col] for col in confirmed_columns]), axis=1) # Convert dates to ISO format df['Date'] = df['Date'].apply( lambda date: datetime_isoformat(date, '%d/%m/%Y')) # Country-wide is the sum of all regions region_level = df country_level = df.groupby(['Date', 'CountryCode']).sum().reset_index()
#!/usr/bin/env python import sys from datetime import datetime, timedelta from pandas import isna from covid_io import read_argv from utils import dataframe_output, merge_previous # Retrieve the CSV files from GitHub df, prev_data = read_argv() df = df.rename(columns={ 'fecha': 'Date', 'casos_total': 'Confirmed', 'fallecimientos': 'Deaths' }) # Parse date into a datetime object df['Date'] = df['Date'].apply(lambda date: datetime.fromisoformat(date).date()) # Add the country code to all records df['CountryCode'] = 'ES' # Convert dates to ISO format df['Date'] = df['Date'].apply(lambda date: date.isoformat()) def filter_function(row): return row['CountryCode'] == 'ES' and isna(row['RegionCode']) # Merge with the prior data
#!/usr/bin/env python import sys from pathlib import Path from datetime import datetime, timedelta import pandas from covid_io import read_argv from utils import dataframe_output, merge_previous df = read_argv().rename( columns={ 'data': 'Date', 'totale_casi': 'Confirmed', 'deceduti': 'Deaths', 'tamponi': 'Tested' }) df['_RegionLabel'] = df['denominazione_regione'] # Parse date into a datetime object df['Date'] = df['Date'].apply(lambda date: datetime.fromisoformat(date).date()) # Convert dates to ISO format df['Date'] = df['Date'].apply(lambda date: date.isoformat()) # Add the country code to all records df['CountryCode'] = 'IT' # Output the results dataframe_output(df, 'IT')
#!/usr/bin/env python from datetime import datetime from pandas import DataFrame from covid_io import read_argv from utils import dataframe_output, merge_previous data = read_argv().transpose() # Transform the data from non-tabulated format to record format records = [] for idx, row in data.iterrows(): for code in data.columns: subset = row[code] record = { 'Date': idx.date().isoformat(), 'RegionCode': code, 'Confirmed': subset[0] } if len(subset) > 1: record['Deaths'] = subset[1] if len(subset) > 2: record['Recovered'] = subset[2] if len(subset) > 3: record['Tested'] = subset[3] records.append(record) data = DataFrame.from_records(records) # Output the results dataframe_output(data, 'AU')