#!/usr/bin/env python from datetime import datetime from datetime import datetime from covid_io import read_argv from utils import datetime_isoformat, pivot_table, dataframe_output def parse_date(date): return datetime_isoformat('%s-%d' % (date, datetime.now().year), '%d-%b-%Y') # Read data from Google Sheets df = read_argv() df.columns = df.iloc[0] df = df.rename(columns={'Provinsi': 'Date'}) df = df.iloc[1:].set_index('Date') df = df[df.columns.dropna()] df = pivot_table(df.transpose(), pivot_name='RegionName') df['Date'] = df['Date'].apply(parse_date) df = df.dropna(subset=['Date']) df = df.rename(columns={'Value': 'Confirmed'}) df['Deaths'] = None df = df.dropna(how='all', subset=['Confirmed', 'Deaths']) # Output the results dataframe_output(df, 'ID')
import os import sys import datetime from pathlib import Path import pandas as pd from utils import github_raw_dataframe, dataframe_output, pivot_table, ROOT df = github_raw_dataframe('carranco-sga/Mexico-COVID-19', 'Mexico_COVID19.csv') df = df.rename(columns={'Fecha': 'Date'}).set_index('Date') deaths_columns = [col for col in df.columns if col.endswith('_D')] confirmed_columns = [col[:-2] for col in deaths_columns] deaths = df[deaths_columns] confirmed = df[confirmed_columns] deaths.columns = confirmed.columns deaths = pivot_table( deaths, pivot_name='RegionCode').rename(columns={'Value': 'Deaths'}) confirmed = pivot_table( confirmed, pivot_name='RegionCode').rename(columns={'Value': 'Confirmed'}) df = confirmed.merge(deaths).sort_values(['Date', 'RegionCode']) # Output the results dataframe_output(df, 'MX')
#!/usr/bin/env python from datetime import datetime from pandas import DataFrame, NA from covid_io import read_argv from utils import dataframe_output, pivot_table # Get CSV file from Github # https://raw.github.com/swsoyee/2019-ncov-japan/master/Data/byDate.csv df = read_argv() df = df.rename(columns={'date': 'Date'}) df['Date'] = df['Date'].apply( lambda date: datetime.strptime(str(date), '%Y%m%d')) df['Date'] = df['Date'].apply(lambda date: date.date().isoformat()) df = df.set_index('Date').cumsum() df = pivot_table( df, pivot_name='RegionName').rename(columns={'Value': 'Confirmed'}) df['Deaths'] = None df = df.dropna(how='all', subset=['Confirmed', 'Deaths']) # Output the results dataframe_output(df, 'JP')
del_index = [ i for i, col in enumerate(columns_lowercase) if col == 'date' ][1:] data = data.iloc[:, [ i for i, _ in enumerate(data.columns) if i not in del_index ]] data = data.set_index(data.columns[date_index]) data = data.iloc[:, :-args.skipcols] if args.droprows is not None: try: data = data.drop(args.droprows.split(',')) except: pass # Pivot the table to fit our preferred format data = pivot_table(data, pivot_name='RegionName') data = data[~data['RegionName'].isna()] if args.debug: print('\n[%d] Pivoted:' % (table_index + 1)) print(data.head(50)) # Make sure all dates include year date_format = args.date_format if '%Y' not in date_format: date_format = date_format + '-%Y' data['Date'] = data['Date'].astype(str) + '-%d' % datetime.now().year # Parse into datetime object, drop if not possible data['Date'] = data['Date'].apply( lambda date: safe_datetime_parse(date, date_format))
# Fetch the table from the Wikipedia article url_article = 'https://es.wikipedia.org/wiki/Pandemia_de_enfermedad_por_coronavirus_de_2020_en_Per%C3%BA' data = read_html(url_article, header=True, selector='table.wikitable', parser=wiki_html_cell_parser, table_index=2).rename(columns={'Fecha': 'Date'}) data = data.set_index('Date').iloc[:-1] data = data.iloc[:, :-3] # Some poorly maintained tables have duplicate dates, pick the first row in such case data = data.loc[~data.index.duplicated(keep='first')] # Pivot the table to fit our preferred format df = pivot_table(data, pivot_name='RegionCode') df = df[~df['RegionCode'].isna()] # Make sure all dates include year date_format = '%d %B' if '%Y' not in date_format: date_format = date_format + '-%Y' df['Date'] = df['Date'] + '-%d' % datetime.now().year # Parse into datetime object, drop if not possible df['Date'] = df['Date'].apply( lambda date: safe_datetime_parse(date, date_format)) df = df[~df['Date'].isna()] # Convert all dates to ISO format df['Date'] = df['Date'].apply(lambda date: date.date().isoformat())