Пример #1
0
def sanitize_row(row):
    # sanitize data:
    # 2020-12-04 contains 'Non communiqué' entries, skip them for now
    if not sc.represents_int(
            row.get('Nombre de cas actuellement hospitalisés')):
        row['Nombre de cas actuellement hospitalisés'] = ''
    if not sc.represents_int(
            row.get('Nombre de cas actuellement en soins intensifs')):
        row['Nombre de cas actuellement en soins intensifs'] = ''
    if not sc.represents_int(row.get('Nombre de nouveaux décès')):
        row['Nombre de nouveaux décès'] = ''
    return row
Пример #2
0
    if data:
        # nothing to do here
        continue

    # we should never reach here unless there is an unknown iframe
    raise Exception(f"issue parsing data in iframe {iframe_url}")

# order dict by key to ensure the most recent entry is last
ordered_rows = OrderedDict(sorted(rows.items()))
for row_date, row in ordered_rows.items():
    if not is_first:
        print('-' * 10)
    is_first = False

    dd = sc.DayData(canton='BL', url=main_url)
    dd.datetime = row['date']
    dd.cases = sc.safeint(row.get('cases'))
    dd.hospitalized = sc.safeint(row.get('hospitalized'))
    dd.icu = sc.safeint(row.get('icu'))
    dd.vent = sc.safeint(row.get('vent'))
    dd.deaths = sc.safeint(row.get('deaths'))
    dd.recovered = sc.safeint(row.get('recovered'))
    dd.quarantined = sc.safeint(row.get('quarantined'))
    dd.quarantine_riskareatravel = sc.safeint(
        row.get('quarantine_riskareatravel'))
    if sc.represents_int(dd.quarantined) and sc.represents_int(
            dd.quarantine_riskareatravel):
        dd.quarantine_total = dd.quarantined + dd.quarantine_riskareatravel
    dd.isolated = sc.safeint(row.get('isolated'))
    print(dd)
Пример #3
0
    r'befanden\ssich\s(\d+)\spositive\sF.lle\snoch\simmer\sin\sIsolation',
    content)
dd.quarantined = sc.find(r'Isolation\sund\s(\d+)\sKontakte\sin\sQuarant.ne',
                         content)
dd.quarantine_riskareatravel = sc.find(r'\s(\d+)\sReisende\sin\sQuarant.ne',
                                       content)
print(dd)

xls_url = 'https://raw.githubusercontent.com/statistikZH/covid19_drop/master/Chiffres%20%20COVID-19%20Valais.xlsx'
main_url = 'https://www.vs.ch/de/web/coronavirus'
xls = sc.xlsdownload(xls_url, silent=True)
rows = sc.parse_xls(xls, header_row=1)
for i, row in enumerate(rows):
    if not isinstance(row['Date'], datetime.datetime):
        continue
    if not sc.represents_int(row['Cumul cas positifs']):
        continue
    if row['Nb nouveaux cas positifs'] is None and row[
            "Nb nouvelles admissions à l'hôpital"] is None:
        continue

    dd = sc.DayData(canton='VS', url=main_url)
    dd.datetime = row['Date'].date().isoformat()
    dd.cases = row['Cumul cas positifs']
    dd.hospitalized = row['Total hospitalisations COVID-19']
    dd.new_hosp = row['Nb nouvelles admissions à l\'hôpital']
    dd.icu = row['Patients COVID-19 aux SI total (y.c. intubés)']
    dd.vent = row['Patients COVID-19 intubés']
    dd.deaths = row['Cumul décès COVID-19']
    if row['Nb de nouvelles sorties'] is not None:
        dd.recovered = sum(r['Nb de nouvelles sorties'] for r in rows[:i + 1])
Пример #4
0
isolated_date = isolated_table.find_next(string=re.compile("Stand")).string
dd_isolated.datetime = sc.find(r'Stand:?\s*(.+[0-9]{4})', isolated_date)

rows = isolated_table.find_all('tr')

headers = rows[0].find_all('td') or rows[0].find_all('th')
assert len(
    headers) == 2, f"Number of header columns changed, {len(headers)} != 2"
assert headers[1].text.strip() == "Anzahl"

for i in range(1, len(rows)):
    cells = rows[i].find_all('td')
    if cells[0].text.strip() == 'Positiv Getestete im Tracing / in Isolation':
        value = cells[1].text.strip()
        if sc.represents_int(value):
            dd_isolated.isolated = int(value)
    elif cells[0].text.strip() == 'Kontaktpersonen im Tracing / in Quarantäne':
        value = cells[1].text.strip()
        if sc.represents_int(value):
            dd_isolated.quarantined = int(value)

if dd_isolated:
    print(dd_isolated)
    print('-' * 10)

# historized cases
csv_url = 'https://www.sg.ch/ueber-den-kanton-st-gallen/statistik/covid-19/_jcr_content/Par/sgch_downloadlist/DownloadListPar/sgch_download.ocFile/KantonSG_C19-Faelle_download.csv'
d = sc.download(csv_url, silent=True)

# strip the "header" / description lines
Пример #5
0
    'https://www.jura.ch/fr/Autorites/Coronavirus/Chiffres-H-JU/Evolution-des-cas-COVID-19-dans-le-Jura.html',
    silent=True)

soup = BeautifulSoup(d, 'html.parser')
box = soup.find('li', class_="ico-xlsx")
xls_url = box.find('a').get('href')
assert xls_url, "URL is empty"
if not xls_url.startswith('http'):
    xls_url = f'https://www.jura.ch{xls_url}'

xls = sc.xlsdownload(xls_url, silent=True)

rows = sc.parse_xls(xls, header_row=0)
is_first = True
for i, row in enumerate(rows):
    if not isinstance(row['Date'], datetime.datetime):
        continue

    if not is_first:
        print('-' * 10)
    is_first = False

    dd = sc.DayData(canton='JU', url=xls_url)
    dd.datetime = row['Date'].date().isoformat()
    dd.cases = row['Cumul des cas confimés']
    dd.hospitalized = row.get('Nb cas actuellement hospitalisés')
    dd.icu = row.get('Nb cas actuellement en SI')
    if sc.represents_int(row.get('Nombre de nouveaux décès')):
        dd.deaths = sum(r['Nombre de nouveaux décès'] for r in rows[:i + 1])
    print(dd)
Пример #6
0
    if dd:
        if not is_first:
            print('-' * 10)
        is_first = False
        print(dd)

# get cases xls
elem = driver.find_element_by_link_text('Indicateurs principaux')
elem.click()
case_xls_url = sgc.get_link_from_element(driver, 'download_table_indicateurs')
assert case_xls_url, "Couldn't find cases XLS url"

xls = sc.xlsdownload(case_xls_url, silent=True)
rows = sc.parse_xls(xls, header_row=0)
for row in rows:
    dd = sc.DayData(canton='GE', url=url)
    dd.datetime = row['Date']
    dd.cases = row['Cumul cas COVID-19']
    current_hosp = row[
        'Total hospitalisations COVID-19 actifs (en cours) canton (HUG-cliniques)']
    if sc.represents_int(current_hosp) and int(current_hosp) >= 0:
        dd.hospitalized = current_hosp
    dd.icu = row['Patients COVID-19 actifs aux soins intensifs HUG']
    dd.icf = row['Patients COVID-19 actifs aux soins intermédiaires HUG']
    dd.deaths = row['Cumul décès COVID-19 ']
    if dd:
        if not is_first:
            print('-' * 10)
        is_first = False
        print(dd)
Пример #7
0
            print('-' * 10)
        is_first = False
        print(dd)

# cases + hospitalization
rows = sc.parse_xls(xls, sheet_name='1. Covid-19-Daten', header_row=2)
for row in rows:
    if not isinstance(row['A'], datetime.datetime):
        continue

    dd = sc.DayData(canton='AG', url=xls_url)
    dd.datetime = f"{row['A'].date().isoformat()} {row['A'].time().isoformat()}"
    dd.cases = row['Gesamtzahl']

    non_icu = row['Bestätigte Fälle auf Abteilung (ohne IPS/IMC)']
    icu = row['Bestätigte Fälle Intensivpflegestation (IPS)']
    icf = row['Bestätigte Fälle Intermediate Care (IMC)']
    if sc.represents_int(non_icu) and sc.represents_int(
            icu) and sc.represents_int(icf):
        dd.hospitalized = int(non_icu) + int(icu) + int(icf)
        dd.icu = icu
        dd.icf = icf
    dd.deaths = row['Gesamtzahl16']
    dd.recovered = row['Gesamtzahl20']

    if dd:
        if not is_first:
            print('-' * 10)
        is_first = False
        print(dd)
Пример #8
0
t = sc.find(r'Contact\s+tracing\s+\(.*?Stand\:?\s+(.+?Uhr).*?\)', d) or \
    sc.find(r'Contact\s+tracing.*Stand\:? (.+? Uhr).*?\)', d) or \
    sc.find(r'Contact\s+tracing.*Stand ([0-9]+\.[0-9]+\.? \/ [0-9]+h)', d)
dd_ct.datetime = t

dd_ct.isolated = sc.find(
    r'Aktuell\s+COVID-19-Erkrankte\s+in\s+Isolation:\s+<strong>\s?(\d+)\s?</strong>',
    d)
quarantined_total = sc.find(
    r'Aktuell\s+im\s+Kanton\s+wohnhafte\s+(?:Kontaktpersonen|Personen)\s+in\s+Quarantäne:\s?<strong>\s?(\d+)\s?</strong>',
    d)
quarantined_travel = sc.find(
    r'davon\s+Anzahl\s+Personen.*die\s+aus\s+einem\s+<strong>Risikogebiet</strong>\s+in\s+die\s+Schweiz\s+eingereist\s+sind\s+und\s+aufgrund\s+dessen\s+aktuell\s+im\s+Kanton\s+in\s+Quarantäne\s+sind:\s+<strong>\s*(\d+)</strong>',
    d)
assert sc.represents_int(
    quarantined_travel
), f"quarantined_travel is not an integer: {quarantined_travel}"
if sc.represents_int(quarantined_total):
    dd_ct.quarantine_total = quarantined_total
    quarantined = int(quarantined_total) - int(quarantined_travel)
    assert quarantined >= 0, f"Quarantined is negative: {quarantined}"
    dd_ct.quarantined = quarantined

dd_ct.quarantine_riskareatravel = quarantined_travel

if dd_ct:
    print(dd_ct)
    print('-' * 10)

# cases
Пример #9
0
    if not is_first:
        print('-' * 10)
    is_first = False

    print('SH')
    sc.timestamp()
    print('Downloading:', xls_url)
    if isinstance(row['Uhrzeit'], datetime.datetime):
        print('Date and time:', row['Datum'].date().isoformat(), row['Uhrzeit'].time().isoformat())
    elif row['Uhrzeit']:
        print('Date and time:', row['Datum'].strftime('%d.%m.%Y'), row['Uhrzeit'])
    else:
        print('Date and time:', row['Datum'].date().isoformat())

    print('Confirmed cases:', row['Positiv'])
    if sc.represents_int(row['Hospitalisation isoliert\nbestätigt']) and sc.represents_int(row['Hospitalisiert_Intensiv']):
        print('Hospitalized:', (row.search(r'Hospitalisation isoliert\s+bestätigt.*$') + row['Hospitalisiert_Intensiv']))
        print('ICU:', row['Hospitalisiert_Intensiv'])
    if row['Verstorben'] is not None:
        print('Deaths:', row['Verstorben'])

    isolated = row.search(r'Anzahl Personen\s+in Isolation.*')
    if isolated is not None:
        print('Isolated:', isolated)
    quarantined = row.search(r'Anzahl Personen\s+in Quarantäne\s+.*Kontaktpersonen.*')
    if quarantined is not None:
        print('Quarantined:', quarantined)
    quarantined_risk = row.search(r'Anzahl Personen\s+in Quarantäne\s+.*Rückkehr.*Risikoländer.*')
    if quarantined_risk is not None:
        print('Quarantined risk area travel:', quarantined_risk)
Пример #10
0
    is_first = False

    print('SH')
    sc.timestamp()
    print('Downloading:', main_url)
    if isinstance(row['Uhrzeit'], datetime.datetime):
        print('Date and time:', row['Datum'].date().isoformat(),
              row['Uhrzeit'].time().isoformat())
    elif row['Uhrzeit']:
        print('Date and time:', row['Datum'].strftime('%d.%m.%Y'),
              row['Uhrzeit'])
    else:
        print('Date and time:', row['Datum'].date().isoformat())

    print('Confirmed cases:', row['Positiv'])
    if sc.represents_int(row.search(
            r'Hospitalisation isoliert\s+bestätigt.*$')) and sc.represents_int(
                row.search(r'Hospitalisiert.*Intensiv.*$')):
        print('Hospitalized:',
              (row.search(r'Hospitalisation isoliert\s+bestätigt.*$') +
               row.search(r'Hospitalisiert.*Intensiv.*$')))
        print('ICU:', row.search(r'Hospitalisiert.*Intensiv.*$'))
    if row['Verstorben'] is not None:
        print('Deaths:', row['Verstorben'])

    isolated = row.search(r'Anzahl Personen\s+in Isolation.*')
    if isolated is not None:
        print('Isolated:', isolated)
    quarantined = row.search(
        r'Anzahl Personen\s+in Quarantäne\s+.*Kontaktpersonen.*')
    if quarantined is not None:
        print('Quarantined:', quarantined)
Пример #11
0
xls_url = f"https://sh.ch{meta['url']}"
xls = sc.xlsdownload(xls_url, silent=True)

rows = sc.parse_xls(xls, header_row=0)
is_first = True
for row in rows:
    if not isinstance(row['Datum'], datetime.datetime):
        continue
    if not (row['Positiv'] or row['Hospitalisiert_Iso'] or row['Hospitalisiert_Intensiv'] or row['Verstorben']):
        continue

    if not is_first:
        print('-' * 10)
    is_first = False

    print('SH')
    sc.timestamp()
    print('Downloading:', xls_url)
    if isinstance(row['Uhrzeit'], datetime.datetime):
        print('Date and time:', row['Datum'].date().isoformat(), row['Uhrzeit'].time().isoformat())
    elif row['Uhrzeit']:
        print('Date and time:', row['Datum'].strftime('%d.%m.%Y'), row['Uhrzeit'])
    else:
        print('Date and time:', row['Datum'].date().isoformat())

    print('Confirmed cases:', row['Positiv'])
    if sc.represents_int(row['Hospitalisiert_Iso']) and sc.represents_int(row['Hospitalisiert_Intensiv']):
        print('Hospitalized:', (row['Hospitalisiert_Iso'] + row['Hospitalisiert_Intensiv']))
        print('ICU:', row['Hospitalisiert_Intensiv'])
    print('Deaths:', row['Verstorben'])