# Pivot the table to fit our preferred format df = pivot_table(data, pivot_name='RegionName') df = df[~df['RegionName'].isna()] if args.debug: print('Data Frame:') print(df.head(50)) # Make sure all dates include year date_format = args.date_format if '%Y' not in date_format: date_format = date_format + '-%Y' df['Date'] = df['Date'] + '-%d' % datetime.now().year # Parse into datetime object, drop if not possible df['Date'] = df['Date'].apply(lambda date: safe_datetime_parse(date, date_format)) df = df[~df['Date'].isna()] # Convert all dates to ISO format df['Date'] = df['Date'].apply(lambda date: date.date().isoformat()) def parenthesis(x): regexp = r'\((\d+)\)' return re.sub(regexp, '', x), (re.search(regexp, x) or [None, None])[1] # Get the confirmed and deaths data from the table df['Confirmed'] = df['Value'].apply(lambda x: safe_int_cast(parenthesis(x)[0])) df['Deaths'] = df['Value'].apply(lambda x: safe_int_cast(parenthesis(x)[1]))
data = pivot_table(data, pivot_name='RegionName') data = data[~data['RegionName'].isna()] if args.debug: print('\n[%d] Pivoted:' % (table_index + 1)) print(data.head(50)) # Make sure all dates include year date_format = args.date_format if '%Y' not in date_format: date_format = date_format + '-%Y' data['Date'] = data['Date'].astype(str) + '-%d' % datetime.now().year # Parse into datetime object, drop if not possible data['Date'] = data['Date'].apply( lambda date: safe_datetime_parse(date, date_format)) data = data[~data['Date'].isna()] # If the dataframe is not empty, then we found a good one if len(data) > 10 and len(data['RegionName'].unique()) > 3: break # Convert all dates to ISO format data['Date'] = data['Date'].apply(lambda date: date.date().isoformat()) def parenthesis(x): regexp = r'\((\d+)\)' return re.sub(regexp, '', x), (re.search(regexp, x) or [None, None])[1]
def parse_date(date): return safe_datetime_parse('%s-%d' % (date, datetime.now().year), '%d-%b-%Y')