Пример #1
0
#!/usr/bin/env python

from datetime import datetime
from datetime import datetime
from covid_io import read_argv
from utils import datetime_isoformat, pivot_table, dataframe_output


def parse_date(date):
    return datetime_isoformat('%s-%d' % (date, datetime.now().year), '%d-%b-%Y')


# Read data from Google Sheets
df = read_argv()

df.columns = df.iloc[0]
df = df.rename(columns={'Provinsi': 'Date'})
df = df.iloc[1:].set_index('Date')

df = df[df.columns.dropna()]
df = pivot_table(df.transpose(), pivot_name='RegionName')
df['Date'] = df['Date'].apply(parse_date)
df = df.dropna(subset=['Date'])
df = df.rename(columns={'Value': 'Confirmed'})
df['Deaths'] = None
df = df.dropna(how='all', subset=['Confirmed', 'Deaths'])

# Output the results
dataframe_output(df, 'ID')
Пример #2
0
import os
import sys
import datetime
from pathlib import Path
import pandas as pd
from utils import github_raw_dataframe, dataframe_output, pivot_table, ROOT

df = github_raw_dataframe('carranco-sga/Mexico-COVID-19', 'Mexico_COVID19.csv')
df = df.rename(columns={'Fecha': 'Date'}).set_index('Date')

deaths_columns = [col for col in df.columns if col.endswith('_D')]
confirmed_columns = [col[:-2] for col in deaths_columns]

deaths = df[deaths_columns]
confirmed = df[confirmed_columns]
deaths.columns = confirmed.columns

deaths = pivot_table(
    deaths, pivot_name='RegionCode').rename(columns={'Value': 'Deaths'})
confirmed = pivot_table(
    confirmed, pivot_name='RegionCode').rename(columns={'Value': 'Confirmed'})

df = confirmed.merge(deaths).sort_values(['Date', 'RegionCode'])

# Output the results
dataframe_output(df, 'MX')
Пример #3
0
#!/usr/bin/env python

from datetime import datetime
from pandas import DataFrame, NA
from covid_io import read_argv
from utils import dataframe_output, pivot_table

# Get CSV file from Github
# https://raw.github.com/swsoyee/2019-ncov-japan/master/Data/byDate.csv
df = read_argv()
df = df.rename(columns={'date': 'Date'})
df['Date'] = df['Date'].apply(
    lambda date: datetime.strptime(str(date), '%Y%m%d'))
df['Date'] = df['Date'].apply(lambda date: date.date().isoformat())
df = df.set_index('Date').cumsum()
df = pivot_table(
    df, pivot_name='RegionName').rename(columns={'Value': 'Confirmed'})
df['Deaths'] = None
df = df.dropna(how='all', subset=['Confirmed', 'Deaths'])

# Output the results
dataframe_output(df, 'JP')
Пример #4
0
    del_index = [
        i for i, col in enumerate(columns_lowercase) if col == 'date'
    ][1:]
    data = data.iloc[:, [
        i for i, _ in enumerate(data.columns) if i not in del_index
    ]]
    data = data.set_index(data.columns[date_index])
    data = data.iloc[:, :-args.skipcols]
    if args.droprows is not None:
        try:
            data = data.drop(args.droprows.split(','))
        except:
            pass

    # Pivot the table to fit our preferred format
    data = pivot_table(data, pivot_name='RegionName')
    data = data[~data['RegionName'].isna()]

    if args.debug:
        print('\n[%d] Pivoted:' % (table_index + 1))
        print(data.head(50))

    # Make sure all dates include year
    date_format = args.date_format
    if '%Y' not in date_format:
        date_format = date_format + '-%Y'
        data['Date'] = data['Date'].astype(str) + '-%d' % datetime.now().year

    # Parse into datetime object, drop if not possible
    data['Date'] = data['Date'].apply(
        lambda date: safe_datetime_parse(date, date_format))
Пример #5
0
# Fetch the table from the Wikipedia article
url_article = 'https://es.wikipedia.org/wiki/Pandemia_de_enfermedad_por_coronavirus_de_2020_en_Per%C3%BA'
data = read_html(url_article,
                 header=True,
                 selector='table.wikitable',
                 parser=wiki_html_cell_parser,
                 table_index=2).rename(columns={'Fecha': 'Date'})
data = data.set_index('Date').iloc[:-1]
data = data.iloc[:, :-3]

# Some poorly maintained tables have duplicate dates, pick the first row in such case
data = data.loc[~data.index.duplicated(keep='first')]

# Pivot the table to fit our preferred format
df = pivot_table(data, pivot_name='RegionCode')
df = df[~df['RegionCode'].isna()]

# Make sure all dates include year
date_format = '%d %B'
if '%Y' not in date_format:
    date_format = date_format + '-%Y'
    df['Date'] = df['Date'] + '-%d' % datetime.now().year

# Parse into datetime object, drop if not possible
df['Date'] = df['Date'].apply(
    lambda date: safe_datetime_parse(date, date_format))
df = df[~df['Date'].isna()]

# Convert all dates to ISO format
df['Date'] = df['Date'].apply(lambda date: date.date().isoformat())