def download(begin, end): if begin or end: logger.info(f"Retrieving data between {begin} and {end}...") df = pd.read_csv(PROVINCE_DATA) if (begin is not None) and (end is None): mask = (pd.to_datetime(df["fecha"]) >= begin) elif (begin is not None) and (end is not None): mask = (pd.to_datetime(df["fecha"]) >= begin) & (pd.to_datetime( df["fecha"]) <= end) else: raise Exception("Please specify begin date") dates = pd.to_datetime(df.loc[mask]["fecha"].unique()).sort_values() for date in dates: logger.info(f"Downloading {date} ...") cov_es = SARSCOV2ES(selected_date=date) cov_es.workflow(dataframe=df) if cov_es.df.empty: logger.error("Empty dataframe for ES data") else: cov_es = SARSCOV2ES() cov_es.workflow() logger.info(cov_es.df) if cov_es.df.empty: raise Exception("Empty dataframe for ES data") da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="ES") da.workflow()
def download(begin, end, traceback): if begin is None: logger.info(f"Trace back {traceback} days") begin = pd.to_datetime( datetime.date.today() - datetime.timedelta(traceback) ) if begin or end: df = get_dataframe( nation_url=NATION_ARCHIVE_API, region_url=REGION_ARCHIVE_API ) logger.info(f"Retrieving data between {begin} and {end}...") if (begin is not None) and (end is None): mask = ( pd.to_datetime(df["date"]) >= begin ) elif (begin is not None) and (end is not None): mask = ( pd.to_datetime(df["date"]) >= begin ) & ( pd.to_datetime(df["date"]) <= end ) else: raise Exception("Please specify begin date") df = df.loc[mask] dates = pd.to_datetime(df["date"].unique()).sort_values() for date in dates: logger.info(f"Downloading {date} ...") cov_uk = SARSCOV2UK(selected_date=date) cov_uk.workflow(dataframe=df) if cov_uk.df.empty: logger.error("Empty dataframe for UK data") # else: # # the latest API has missing data. # # kind of makes sense since some departments report data quite late. # df = get_dataframe( # nation_url=NATION_LATEST_API, region_url=REGION_LATEST_API # # nation_url=NATION_ARCHIVE_API, region_url=REGION_ARCHIVE_API # ) # cov_uk = SARSCOV2UK() # cov_uk.workflow(dataframe=df) # logger.info(cov_uk.df) # if cov_uk.df.empty: # raise Exception("Empty dataframe for UK data") da = DailyAggregator( base_folder="dataset", daily_folder=DAILY_FOLDER, country="UK" ) da.workflow()
def download(source): logger.info(f"Using url = {source}") print(source) cov_no = SARSCOV2NO(url=source) cov_no.workflow() print(cov_no.df) da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="NO") da.workflow()
def download(begin, end): if begin or end: logger.info(f"Retrieving data between {begin} and {end}...") df = pd.read_csv(REPORT_URL, sep=";") if (begin is not None) and (end is None): mask = ( pd.to_datetime(df["Date_of_report"]) >= begin ) elif (begin is not None) and (end is not None): mask = ( pd.to_datetime(df["Date_of_report"]) >= begin ) & ( pd.to_datetime(df["Date_of_report"]) <= end ) else: raise Exception("Please specify begin date") df = df.loc[mask] dates = pd.to_datetime(df["Date_of_report"].unique()).sort_values() for date in dates: logger.info(f"Downloading {date} ...") cov_nl = SARSCOV2NL(selected_date=date) cov_nl.workflow(dataframe=df) if cov_nl.df.empty: logger.error("Empty dataframe for NL data") else: cov_nl = SARSCOV2NL() cov_nl.workflow() logger.info(cov_nl.df) if cov_nl.df.empty: raise Exception("Empty dataframe for NL data") da = DailyAggregator( base_folder="dataset", daily_folder=DAILY_FOLDER, country="NL",replace={ "Noardeast-Fryslân": "Noardeast-Fryslân", "Súdwest-Fryslân": "Súdwest-Fryslân", "Súdwest Fryslân": "Súdwest-Fryslân", "s-Gravenhage": "'s-Gravenhage" } ) da.workflow()
def download(full): scrapper = SARSCOV2CH() if full: logger.info('Downloading cases for all dates ...') scrapper.extract_table() for i in range(len(scrapper.dates)): scrapper.date_index = i # small hack to avoid refactoring scrapper.workflow() else: logger.info('Downloading cases for the last day only ...') scrapper.workflow() da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="CH") da.workflow()
def download(full): if full: logger.info("Download full data") cov_it_full = SARSCOV2ITFULL() cov_it_full.workflow() cov_it = SARSCOV2IT() cov_it.workflow() logger.info(cov_it.df) if cov_it.df.empty: raise Exception("Empty data for IT data") da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="IT") da.workflow()
def download(full): if full: scrapper = SARSCOV2BE(history=True) logger.info('Downloading cases for all dates ...') scrapper.full_history() else: scrapper = SARSCOV2BE() logger.info('Downloading cases for the last day only ...') scrapper.workflow() da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="BE", fill=False) da.workflow()
inplace=True ) self.df.fillna(0, inplace=True) self.df["deaths"] = self.df.deaths.astype(int) self.df.replace( "Schleswig Holstein", "Schleswig-Holstein", inplace=True ) self.df.sort_values(by="cases", inplace=True) self.df.replace("Gesamt", "sum", inplace=True) if __name__ == "__main__": cov_de = SARSCOV2DE() cov_de.workflow() print(cov_de.df) da = DailyAggregator( base_folder="dataset", daily_folder=DAILY_FOLDER, country="DE" ) da.workflow() print("End of Game")
break if not exists: self.df.to_csv( f"{self.daily_folder}/{self.country.lower()}_covid19_{self.date}_{self.hour:0.0f}_{self.minute:02.0f}.csv", index=False) if __name__ == "__main__": column_converter = {"authority": "nuts_3"} daily_files = retrieve_files(ENGLAND_DAILY_FOLDER) daily_files.sort() for file in daily_files: file_path = os.path.join(ENGLAND_DAILY_FOLDER, file) file_transformation = DailyTransformation( file_path=file_path, column_converter=column_converter) file_transformation.workflow() cov_england = SARSCOV2England() cov_england.workflow() da_england = DailyAggregator(base_folder="dataset", daily_folder=ENGLAND_DAILY_FOLDER, country="England") da_england.workflow() print("End of Game")
# daily_files.sort() # for file in daily_files: # file_path = os.path.join(DAILY_FOLDER, file) # file_transformation = DailyTransformation( # file_path=file_path, # column_converter=column_converter, # drop_rows=drop_rows # ) # file_transformation.workflow() cov_nl = SARSCOV2NL() cov_nl.workflow() print(cov_nl.df) if cov_nl.df.empty: raise Exception("Empty dataframe for NL data") da = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="NL", replace={ "Noardeast-Fryslân": "Noardeast-Fryslân", "Súdwest-Fryslân": "Súdwest-Fryslân", "Súdwest Fryslân": "Súdwest-Fryslân", "s-Gravenhage": "'s-Gravenhage" }) da.workflow() print("End of Game")
text = html.unescape(text) dt_from_re = re_dt.findall(text) if not dt_from_re: raise Exception("Did not find datetime from webpage") dt_from_re = dt_from_re[0].replace("/", "") dt_from_re = dateutil.parser.parse(dt_from_re, dayfirst=True) self.dt = dt_from_re def post_processing(self): self.df.sort_values(by="cases", inplace=True) if __name__ == "__main__": cov_at = SARSCOV2AT() cov_at.workflow() print(cov_at.df) da = DailyAggregator( base_folder="dataset", daily_folder=DAILY_FOLDER, country="AT", fill=False ) da.workflow() print("End of Game")
if __name__ == "__main__": # column_converter = { # "nuts_2": "nuts_3" # } # daily_files = retrieve_files(SCOTLAND_DAILY_FOLDER) # daily_files.sort() # for file in daily_files: # file_path = os.path.join(SCOTLAND_DAILY_FOLDER, file) # file_transformation = DailyTransformation( # file_path=file_path, # column_converter=column_converter # ) # file_transformation.workflow() cov_scotland = SARSCOV2Scotland() cov_scotland.workflow() da_scotland = DailyAggregator( base_folder="dataset", daily_folder=SCOTLAND_DAILY_FOLDER, country="Scotland" ) da_scotland.workflow() print("End of Game")
if __name__ == "__main__": # column_converter = { # "authority": "nuts_3" # } # daily_files = retrieve_files(WALES_DAILY_FOLDER) # daily_files.sort() # for file in daily_files: # file_path = os.path.join(WALES_DAILY_FOLDER, file) # file_transformation = DailyTransformation( # file_path=file_path, # column_converter=column_converter # ) # file_transformation.workflow() cov_wales = SARSCOV2Wales() cov_wales.workflow() logger.info(cov_wales.df) da_wales = DailyAggregator(base_folder="dataset", daily_folder=WALES_DAILY_FOLDER, country="Wales") da_wales.workflow() print("End of Game")
# re_dt = re.compile(r'as of (.*)') re_dt = re.compile(r'as of\s+(\d+\s\w+\s\d+)') re_dt_res = re_dt.findall(el[0].xpath('.//h1/span/text()')[0]) if not re_dt_res: raise Exception("Could not find datetime on the web page") # self.dt = dateutil.parser.parse(re_dt_res[0], dayfirst=True) # self.dt = datetime.datetime.strptime(re_dt_res[0] + " 1", "week %W %Y %w") self.dt = pd.to_datetime(re_dt_res[0]) def add_country_to_df(self): logger.debug("No need to add country") def post_processing(self): self.df.sort_values(by="cases", inplace=True) if __name__ == "__main__": cov_ecdc = SARSCOV2ECDC() cov_ecdc.workflow() da_ecdc = DailyAggregator(base_folder="dataset", daily_folder=DAILY_FOLDER, country="ECDC") da_ecdc.workflow() print("End of Game")