def parse_xlsx(): html_url = 'https://www.vd.ch/toutes-les-actualites/hotline-et-informations-sur-le-coronavirus/point-de-situation-statistique-dans-le-canton-de-vaud/' d = sc.download(html_url, silent=True) soup = BeautifulSoup(d, 'html.parser') xls_url = soup.find(href=re.compile("\.xlsx$")).get('href') assert xls_url, "URL is empty" xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls, header_row=2) is_first = True for row in rows: if not isinstance(row['Date'], datetime.datetime): continue if not is_first: print('-' * 10) is_first = False print('VD') sc.timestamp() print('Downloading:', html_url) print('Date and time:', row['Date'].date().isoformat()) print('Confirmed cases:', row['Nombre total de cas confirmés positifs']) print('Hospitalized:', row['Hospitalisation en cours']) print('ICU:', row['Dont soins intensifs']) print('Deaths:', row['Décès'])
def parse_html(): # https://www.vd.ch/toutes-les-actualites/hotline-et-informations-sur-le-coronavirus/point-de-situation-statistique-dans-le-canton-de-vaud/ # includes a content from datawrapper ( https://datawrapper.dwcdn.net/tr5bJ/14/ ), # which provides actual data and table rendering. # Here we instead use datawrapper API directly to fetch the data. main_url = 'https://www.vd.ch/toutes-les-actualites/hotline-et-informations-sur-le-coronavirus/point-de-situation-statistique-dans-le-canton-de-vaud/' url = 'https://api.datawrapper.de/v3/charts/tr5bJ/data' print('Downloading:', main_url) # The bearer authentication token provided by Alex Robert ( https://github.com/AlexBobAlex ) data = requests.get( url, headers={ 'accept': 'text/csv', 'Authorization': 'Bearer 6868e7b3be4d7a69eff00b1a434ea37af3dac1e76f32d9087fc544dbb3f4e229' }) d = data.text # Date Hospitalisations en cours Dont soins intensifs Sortis de l'hôpital Décès Total cas confirmés # 10.03.2020 36 8 5 1 130 # 11.03.2020 38 7 5 3 200 rows = d.split('\n') # Remove empty rows rows = [row for row in rows if len(row.strip())] headers = rows[0].split('\t') assert headers[0:6] == [ "Date", "Hospitalisations en cours", "Dont soins intensifs", "Sortis de l'hôpital", "Décès", "Total cas confirmés" ], f"Table header mismatch: Got: {headers}" is_first = True for row in rows: if not is_first: print('-' * 10) is_first = False cells = row.split('\t') print('VD') sc.timestamp() print('Downloading:', main_url) print('Date and time:', cells[0]) print('Confirmed cases:', cells[5]) print('Deaths:', cells[4]) print('Hospitalized:', cells[1]) print('ICU:', cells[2]) if cells[3].isnumeric(): print('Recovered:', cells[3])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import scrape_common as sc xls_url = 'https://www.ne.ch/autorites/DFS/SCSP/medecin-cantonal/maladies-vaccinations/Documents/Covid-19-Statistiques/COVID19_PublicationInternet.xlsx' xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls) for i, row in enumerate(rows): print('NE') sc.timestamp() print('Downloading:', xls_url) print('Date and time:', row['A'].date().isoformat()) print('Confirmed cases:', row['Cumul']) print('Hospitalized:', row['Total des cas hospitalisés']) if row['Soins intensifs (intubés)'] and row[ 'Soins intensifs (non intubés)']: print( 'ICU:', int(row['Soins intensifs (intubés)']) + int(row['Soins intensifs (non intubés)'])) print('Vent:', row['Soins intensifs (intubés)']) print('Deaths:', row['Cumul des décès']) # do not print record delimiter for last record # this is an indicator for the next script to check # for expected values. if len(rows) - 1 > i: print('-' * 10)