Пример #1
0
    assert district in district_ids, f'District {district} is unknown!'

# start getting and parsing the data
html_url = 'https://www.besondere-lage.sites.be.ch/besondere-lage_sites/de/index/corona/index.html'
d = sc.download(html_url, silent=True)
d = d.replace(' ', ' ')
soup = BeautifulSoup(d, 'html.parser')
tbody = soup.find('table', {'summary': 'Laufend aktualisierte Zahlen zu den Corona-Erkrankungen im Kanton Bern'}).find_next('tbody')
for row in tbody.find_all('tr'):
    tds = row.find_all('td')
    date_str = sc.find(r'(\d+\.\d+\.\d+)', tds[0].text)
    date = sc.date_from_text(date_str)

    dds = {}
    for (district, d_id), (district, population) in zip(district_ids.items(), inhabitants.items()):
        dd = sc.DistrictData(district=district, canton='BE')
        dd.url = html_url
        dd.district_id = d_id
        dd.population = population
        dd.date = date.isoformat()
        dd.new_cases = 0
        dds[district] = dd

    content = tds[2].text.strip()
    # fix Munchen-<br />\nbuchsee stuff
    content = re.sub(r'-\n(\w)', r'-\1', content)
    # fix <br /> without - from above, but no number on the next line...
    content = re.sub(r'\n([A-Z|a-z])', r' \1', content)
    for item in content.split('\n'):
        res = re.match(r'(\d+) (.*)', item)
        assert res is not None, f'Unexpected item {item} for number / city'
Пример #2
0
for header in trs[0]:
    week = sc.find(r'Woche (\d+)', header.string)
    if week is not None:
        weeks.append(week)
        if int(week) > 50:
            years.append('2020')
        else:
            years.append('2021')

for tr in trs[1:]:
    tds = tr.find_all('td')

    for i in range(len(weeks)):
        district = tds[0].string
        if district in inhabitants:
            dd = sc.DistrictData(canton='FR', district=district)
            dd.url = url
            dd.week = weeks[i]
            # TODO restore once all weeks are in 2021
            # dd.year = '20' + year
            dd.year = years[i]
            dd.new_cases = tds[i + 1].string
            dd.population = inhabitants[district]
            dd.district_id = district_ids[district]
            print(dd)


# daily data from xls
xls_url, xls, main_url = get_fr_xls()
rows = sc.parse_xls(xls, header_row=0)
for row in rows:
Пример #3
0
initial_cases = {
    'Arlesheim': 528,
    'Laufen': 65,
    'Liestal': 177,
    'Sissach': 81,
    'Waldenburg': 15,
}

# order dict by key to ensure the most recent entry is last
ordered_rows = OrderedDict(sorted(rows.items()))

#for row_date, row in ordered_rows.items():
#    for district, district_id in district_ids.items():

for district, district_id in district_ids.items():
    last_total_cases_val = initial_cases[district]
    if district == 'Arlesheim':
        # 2020-05-31 is 527
        last_total_cases_val = 527

    for row_date, row in ordered_rows.items():
        dd = sc.DistrictData(canton='BL', district=district)
        dd.district_id = district_id
        dd.population = population[district]
        dd.url = main_url
        dd.date = row['date']
        dd.total_cases = row[district] + initial_cases[district]
        dd.new_cases = dd.total_cases - last_total_cases_val
        last_total_cases_val = dd.total_cases
        print(dd)
Пример #4
0
import csv
from io import StringIO
import requests
import scrape_common as sc

# perma link to TG COVID dataset on opendata.swiss
r = requests.get(
    'https://opendata.swiss/api/3/action/ogdch_dataset_by_identifier',
    params={'identifier': 'gesundheit_04-2020_stat@kanton-thurgau'})
dataset = r.json()['result']
resource = next(
    r for r in dataset['resources']
    if r['name']['de'] == 'COVID19 Fallzahlen Kanton Thurgau auf Ebene Bezirk')

assert resource['download_url'], "Download URL not found"

d_csv = sc.download(resource['download_url'], silent=True, encoding='latin1')

reader = csv.DictReader(StringIO(d_csv), delimiter=';')
for row in reader:
    dd = sc.DistrictData(canton='TG')
    dd.district_id = row['DistrictId']
    dd.district = row['District']
    dd.population = row['Population']
    dd.week = row['Week']
    dd.year = row['Year']
    dd.new_cases = row['NewConfCases']
    dd.url = resource['download_url']
    print(dd)
Пример #5
0
population = [
    4440,
    10930,
    26910,
    28650,
    12360,
    49230,
    10860,
    47750,
    28910,
    47980,
    15260,
    13830,
    46840,
]


assert len(district_values) == 13, f'expected 13 district values, but got {len(district_values)} for {url}'
i = 0
for value in district_values:
    dd = sc.DistrictData(canton='VS', district=districts[i])
    dd.url = url
    dd.district_id = district_ids[i]
    dd.population = population[i]
    dd.week = week
    dd.year = year
    dd.new_cases = value
    print(dd)
    i += 1
Пример #6
0
    241: 'Jura bernois',
    242: 'Biel/Bienne',
    243: 'Seeland',
    244: 'Oberaargau',
    245: 'Emmental',
    246: 'Bern-Mittelland',
    247: 'Thun',
    248: 'Obersimmental-Saanen',
    249: 'Frutigen-Niedersimmental',
    250: 'Interlaken-Oberhasli',
}

url = 'https://covid-kennzahlen.apps.be.ch/#/de/cockpit'
csv_url = 'https://raw.githubusercontent.com/openDataBE/covid19Data/develop/7_d_inzidenz_verwaltungskreis.csv'
d = sc.download(csv_url, silent=True)
reader = csv.DictReader(StringIO(d), delimiter=',')
for row in reader:
    #dd = sc.DistrictData(district=district, canton='BE')
    district_id = int(row['bfs_nummer'])
    dd = sc.DistrictData(district=district_ids[district_id], canton='BE')
    dd.url = url
    dd.district_id = district_id
    dd.population = row['einwohnerzahl']
    date = sc.date_from_text(row['datum'])
    week = date.isocalendar()[1]
    dd.week = week
    dd.year = date.year
    dd.new_cases = round(
        float(row['7_d_inzidenz']) / 100e3 * int(row['einwohnerzahl']))
    print(dd)
Пример #7
0
    'Rheintal': 1723,
    'Werdenberg': 1724,
    'Sarganserland': 1725,
    'See-Gaster': 1726,
    'Toggenburg': 1727,
    'Wil': 1728,
}

url = 'https://www.sg.ch/ueber-den-kanton-st-gallen/statistik/covid-19/_jcr_content/Par/sgch_downloadlist/DownloadListPar/sgch_download.ocFile/KantonSG_C19-Faelle_download.csv'
d = sc.download(url, silent=True)

# strip the "header" / description lines
d = "\n".join(d.split("\n")[5:])

reader = csv.DictReader(StringIO(d), delimiter=';')
for row in reader:
    week = sc.find(r'W(\d+)', row['Kalenderwoche'])
    date = sc.date_from_text(row['Falldatum'])

    for key, value in inhabitants.items():
        dd = sc.DistrictData(canton='SG', district=key)
        dd.url = url
        dd.week = week
        dd.year = date.year
        dd.date = date.isoformat()
        dd.district_id = district_ids[key]
        dd.new_cases = row['Wahlkreis ' + key]
        dd.total_cases = row['Wahlkreis ' + key + ' (kumuliert)']
        dd.population = value
        print(dd)
Пример #8
0
    'Plessur': 1848,
    'Prättigau/Davos': 1849,
    'Surselva': 1850,
    'Viamala': 1851,
}

limit = '100'
url = 'https://services1.arcgis.com/YAuo6vcW85VPu7OE/arcgis/rest/services/Fallzahlen_Pro_Region/FeatureServer/0/query?f=json&where=Datum%3E%3Dtimestamp%20%272020-02-01%2000%3A00%3A00%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Region%20asc&resultOffset=0&resultRecordCount=10000&resultType=standard&cacheHint=true'

resp = requests.get(url=url)
json_data = resp.json()

print(sc.DistrictData.header())

for attributes in json_data['features']:
    element = attributes['attributes']

    dd = sc.DistrictData(canton='GR', district=element['Region'])
    dd.url = url
    date = datetime.datetime.utcfromtimestamp(element['Datum'] / 1000)
    dd.date = date.date().isoformat()
    dd.total_cases = element['Faelle__kumuliert_']
    dd.new_cases = element['Neue_Faelle']
    dd.total_deceased = element['Verstorbene__kumuliert_']
    dd.new_deceased = element['Verstorbene']
    if dd.district in inhabitants:
        dd.population = inhabitants[dd.district]
    if dd.district in district_ids:
        dd.district_id = district_ids[dd.district]
    print(dd)
Пример #9
0
import os

import db_common as dc
import scrape_common as sc

__location__ = dc.get_location()

input_failures = 0

try:
    DATABASE_NAME = os.path.join(__location__, 'data.sqlite')
    conn = sqlite3.connect(DATABASE_NAME)

    i = 0
    for line in sys.stdin:
        dd = sc.DistrictData()
        if dd.parse(line.strip()):
            c = conn.cursor()
            try:
                print(dd)

                c.execute(
                    '''
                    INSERT INTO data (
                      DistrictId,
                      District,
                      Canton,
                      Date,
                      Week,
                      Year,
                      Population,
Пример #10
0
#cv2.waitKey(0)
custom_config = '--oem 3 --psm 6'
text_in_img = pytesseract.image_to_string(img, config=custom_config)

# delete the temp img file
os.remove(path)

def parse_line(line):
    in_str = "OBFT"
    out_str = "0877"
    tab = str.maketrans(in_str, out_str)
    match = re.match(r'^(.*)\s+(?:[_-]\s+)?(\S+)\s+(\S+)\s+(\S+)$', line)
    if match:
        return (int(match[3].replace("'", "").translate(tab)), int(match[4].translate(tab)))
    return (None, None)

for name, config in districts.items():
    for line in text_in_img.split('\n'):
        dd = sc.DistrictData(canton='AG', district=name)
        dd.district_id = config['district_id']
        dd.url = data_url
        if re.search(config['pattern'], line, flags=re.I):
            population, total_cases = parse_line(line)
            assert population == config['population'], f"Population number for {name} does not match, {population} != {config['population']}"
            dd.date = img_date.date().isoformat()
            dd.population = population
            dd.total_cases = total_cases
            break
    assert dd, f"No data found for district {name}, Text: {text_in_img}"
    print(dd)