def parse_dataframes(self, dataframes: List[DataFrame], aux: Dict[str, DataFrame], **parse_opts) -> DataFrame: data = dataframes[0] # Adjust 7 hour difference between China's GMT+8 and GMT+1 data["date"] = data["updateTime"].apply( lambda date: timezone_adjust(date, 7)) # Rename the appropriate columns data = data.rename( columns={ "countryEnglishName": "country_name", "provinceEnglishName": "match_string", "province_confirmedCount": "confirmed", "province_deadCount": "deceased", "province_curedCount": "recovered", }) # Filter specific country data only data = data[data["country_name"] == parse_opts["country_name"]] # This is time series data, get only the last snapshot of each day data = (data.sort_values("updateTime").groupby( ["date", "country_name", "match_string"]).last().reset_index()) keep_columns = [ "date", "country_name", "match_string", "confirmed", "deceased", "recovered", ] return grouped_diff(data[keep_columns], ["country_name", "match_string", "date"])
def parse_dataframes( self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts ) -> DataFrame: data = dataframes[0] # Adjust 7 hour difference between China's GMT+8 and GMT+1 data["date"] = data["updateTime"].apply(lambda date: timezone_adjust(date, 7)) # Rename the appropriate columns data = data.rename( columns={ "countryEnglishName": "country_name", "provinceEnglishName": "match_string", "province_confirmedCount": "total_confirmed", "province_deadCount": "total_deceased", "province_curedCount": "total_recovered", } ) # Filter specific country data only data = data[data["country_name"] == parse_opts["country_name"]] # This is time series data, get only the last snapshot of each day data = ( data.sort_values("updateTime") .groupby(["date", "country_name", "match_string"]) .last() .reset_index() ) # A couple of regions are reported using conflicting country codes, harmonize them here so # we avoid repeated regions data["key"] = None data.loc[data["match_string"] == "Taiwan", "key"] = "TW" data.loc[data["match_string"] == "Hong Kong", "key"] = "HK" keep_columns = [ "key", "date", "country_name", "match_string", "total_confirmed", "total_deceased", "total_recovered", ] return data[keep_columns]