#!/usr/bin/env python3 import re import arrow from bs4 import BeautifulSoup import scrape_common as sc def parse_gr_date(date_str): return arrow.get(date_str, 'DD.MM.YYYY', locale='de').datetime.date() url = 'https://www.gr.ch/DE/institutionen/verwaltung/djsg/ga/coronavirus/info/impfen/Seiten/impfen.aspx' hist_url = 'https://www.gr.ch/DE/institutionen/verwaltung/djsg/ga/coronavirus/_layouts/15/GenericDataFeed/feed.aspx?PageID=30&ID=g_dbea8372_ed27_48e8_b2c8_b1f7a9643675&FORMAT=JSONRAW' d = sc.download_json(hist_url) for data in d: vd = sc.VaccinationData(canton='GR', url=url) vd.date = parse_gr_date(data['Stand']) vd.first_doses = int(data['verimpft1']) vd.second_doses = int(data['verimpft2']) vd.total_vaccinations = vd.first_doses + vd.second_doses print(vd)
base_url = 'https://www.jura.ch' url = f'{base_url}/fr/Autorites/Coronavirus/Infos-Actualite/Statistiques-COVID/Evolution-des-cas-COVID-19-dans-le-Jura.html' d = sc.download(url) d = d.replace(' ', ' ') soup = BeautifulSoup(d, 'html.parser') pdf_url = soup.find('a', title=re.compile(r'Donn.es de vaccination')).get('href') if not pdf_url.startswith('http'): pdf_url = f'{base_url}{pdf_url}' pdf_url = pdf_url.replace('?download=1', '') pdf = sc.download_data(pdf_url) pages = sc.pdfinfo(pdf) vd = sc.VaccinationData(canton='JU', url=pdf_url) content = sc.pdf_to_text(pdf, page=1, raw=True) content = re.sub(r'(\d+)\'(\d+)', r'\1\2', content) res = re.search(r'\d+\.\d+\.\d{4}\s(\d+\.\d+\.\d{4})', content) assert res vd.date = parse_ju_date(res[1]) res = re.search(r'(\d+)\s+Nombre d\'injection', content) assert res vd.total_vaccinations = res[1] content = sc.pdf_to_text(pdf, page=2, raw=True) content = re.sub(r'(\d+)\'(\d+)', r'\1\2', content)
csv_url = 'https://www.zg.ch/behoerden/gesundheitsdirektion/statistikfachstelle/daten/themen/result-themen-14-03-12.csv' d_csv = sc.download(csv_url) """ "Datum","Typ","Anzahl","Meta","Type","Content" "23.12.2020","Total verimpfte Dosen","250",NA,NA,NA "24.12.2020","Total verimpfte Dosen","250",NA,NA,NA """ reader = csv.DictReader(StringIO(d_csv), delimiter=',') data = collections.defaultdict(dict) for row in reader: if row['Datum'] == 'NA': continue date = parse_zg_date(row['Datum']) if date not in data: vd = sc.VaccinationData(canton='ZG', url=csv_url) vd.date = date.isoformat() data[date] = vd if row['Typ'] == 'Total verimpfte Dosen': data[date].total_vaccinations = row['Anzahl'] elif row['Typ'] == 'Total 1. Impfung': data[date].first_doses = row['Anzahl'] elif row['Typ'] == 'Total 2. Impfung': data[date].second_doses = row['Anzahl'] dates = list(data.keys()) for date in dates: print(data[date])
def get_value(row, key): value = row[key] if value != '': return int(value) return None url = 'https://statistik.tg.ch/public/upload/assets/94501/COVID19_Fallzahlen_Kanton_TG.csv' d_csv = sc.download(url) reader = csv.DictReader(StringIO(d_csv), delimiter=';') total_doses_delivered = 0 for row in reader: if not row['date']: continue vd = sc.VaccinationData(canton='TG', url=row['source']) date = row['date'] date = parse_tg_date(date) vd.date = date.isoformat() vd.total_vaccinations = get_value(row, 'total_vaccinations') doses_delivered = get_value(row, 'doses_delivered') if doses_delivered: total_doses_delivered += doses_delivered vd.doses_delivered = total_doses_delivered vd.first_doses = get_value(row, 'first_doses') vd.second_doses = get_value(row, 'second_doses') if vd: print(vd)
url = f'{base_url}{url}' pdf_urls.append(url) for pdf_url in pdf_urls: pdf = sc.download_data(pdf_url) content = sc.pdf_to_text(pdf, layout=True, page=1) res = re.search(r'(\d{2}/\d{2}/20\d{2})', content) assert res date = res[1] date = parse_vs_date_daily(date) if date.year == 2020: # no data available in 2020 break vd = sc.VaccinationData(canton='VS', url=pdf_url) vd.date = date.isoformat() res = re.search(r'.*Anzahl\s+der\s+\w+\s+Impfdosen.*\s+(\d+.\d+)\s+', content) if not res: # no data available in oder PDFs # (latest are processed first) break vd.total_vaccinations = re.sub(r'[^0-9]', '', res[1]) if vd: print(vd) url = f'{base_url}/web/coronavirus/statistiques-hebdomadaires' content = sc.download(url) soup = BeautifulSoup(content, 'html.parser')
return 0 soup = BeautifulSoup(main_site, 'html.parser') for iframe in soup.find_all('iframe'): iframe_url = (iframe['src']) if iframe_url.find('/dbw/264') <= 0: continue d = sc.download(iframe_url) d = d.replace('\n', ' ') res = re.search( r'<pre id="data_1".*?> ?Datum,"Pfizer/BioNTech \(1. Dosis\)","Pfizer/BioNTech \(2. Dosis\)","Moderna \(1. Dosis\)","Moderna \(2. Dosis\)"\s*([^<]+)</pre>', d) assert res data = res[1] if data: for row in data.split(" "): c = row.split(',') assert len(c) == 5, f"Number of fields changed, {len(c)} != 5" vd = sc.VaccinationData('BL', url=main_url) vd.date = parse_row_date(c[0]) vd.first_doses = to_int(c[1]) + to_int(c[3]) vd.second_doses = to_int(c[2]) + to_int(c[4]) vd.total_vaccinations = vd.first_doses + vd.second_doses print(vd) break
#!/usr/bin/env python3 import datetime import tempfile import openpyxl import scrape_common as sc url = 'https://www.llv.li/inhalt/118804/amtsstellen/sonderseite-covid-19' xls_url = 'https://www.llv.li/files/as/impfungen.xlsx' xls_data = sc.download_data(xls_url) fp = tempfile.NamedTemporaryFile(suffix='.xlsx') fp.write(xls_data) book = openpyxl.load_workbook(fp.name) sheet = book['Impfungen'] for i in range(2, sheet.max_column): vd = sc.VaccinationData(canton='FL', url=xls_url) date = sheet.cell(4, i) if date.value is None: continue vd.date = date.value.date().isoformat() vd.doses_delivered = int(sheet.cell(5, i).value) vd.total_vaccinations = int(sheet.cell(6, i).value) vd.second_doses = int(sheet.cell(7, i).value) vd.first_doses = vd.total_vaccinations - vd.second_doses assert vd print(vd)
fp.write(xls_data) book = openpyxl.load_workbook(fp.name) sheet = book['Tabelle1'] start_row = 2 date_column = 1 first_dose_column = 12 second_dose_column = 13 for row in range(start_row, sheet.max_row): value = sheet.cell(row, first_dose_column).value if value is None or value == '': continue try: vd = sc.VaccinationData(canton='SH', url=main_url) date = sheet.cell(row, date_column) vd.date = date.value.date().isoformat() vd.first_doses = int(sheet.cell(row, first_dose_column).value) vd.second_doses = int(sheet.cell(row, second_dose_column).value) vd.total_vaccinations = vd.first_doses + vd.second_doses assert vd print(vd) except: # TODO? pass