Пример #1
0
def parse_weekly_data(filename):
    txt = c.pdf_to_text(filename)
    txt = re.sub(r'(\d)\s(\d)', r'\1\2', txt)

    year = c.search(r'Stand:\s\d+\.\d+\.(\d{4})', txt)
    week = int(c.search(r'Liechtenstein - Woche (\d+) ', txt))

    tot_tests = ''
    tot_antigen_tests = ''
    pcr_pos = txt.find('Gemeldete Tests')
    if pcr_pos > 0:
        pcr_pos = txt.find('PCR', pcr_pos)
        pcr_end_pos = txt.find('\n', pcr_pos)
        assert pcr_end_pos > pcr_pos
        line = txt[pcr_pos:pcr_end_pos]

        #line = re.sub(r'(\d)\s(\d)', r'\1\2', line)
        line = re.sub(r'\s+', r' ', line)
        tot_tests = c.txt_to_int(line.split(' ')[-2])

        # Antigen tests
        pcr_pos = txt.find('Antigen-Schnelltests', pcr_pos)
        pcr_end_pos = txt.find('\n', pcr_pos)
        assert pcr_end_pos > pcr_pos
        line = txt[pcr_pos:pcr_end_pos]
        line = re.sub(r'\s+', r' ', line)
        tot_antigen_tests = c.txt_to_int(line.split(' ')[-2])

    positivity_rate = ''
    antigen_positivity_rate = ''
    positivity_pos = txt.find('\nPositivit')
    if positivity_pos == -1:
        positivity_pos = txt.find('\nAnteil positiver Tests')
    if positivity_pos > 0:
        positivity_pos = txt.find('PCR', positivity_pos)
        positivity_end_pos = txt.find('\n', positivity_pos)
        assert positivity_end_pos > positivity_pos
        line = txt[positivity_pos:positivity_end_pos]
        line = re.sub(r'\s+', r' ', line)
        positivity_rate = line.split(' ')[-1]
        positivity_rate = c.txt_to_float(positivity_rate.replace('%', ''))

        # Antigen tests
        positivity_pos = txt.find('Antigen-Schnelltest', positivity_pos)
        positivity_end_pos = txt.find('\n', positivity_pos)
        assert positivity_end_pos > positivity_pos
        line = txt[positivity_pos:positivity_end_pos]
        line = re.sub(r'\s+', r' ', line)
        antigen_positivity_rate = line.split(' ')[-1]
        try:
            antigen_positivity_rate = c.txt_to_float(
                positivity_rate.replace('%', ''))
        except:
            pass

    print(
        f'{year},{week},{tot_tests},{positivity_rate},{tot_antigen_tests},{antigen_positivity_rate},{filename}'
    )
Пример #2
0
def parse_data(filename):
    txt = c.pdf_to_text(filename)
    date_time = c.search(r'Stand (\d.*) Uhr', txt)
    if date_time is None:
        date = c.search(r'Stand\: (\d{2}\.\d{2}\.20\d{2})', txt)
        time = c.search(r'Zeit: (\d+:\d{2})', txt)
        if date is not None and time is not None:
            date_time = '{} {}'.format(date, time)
    date = c.parse_date(date_time)

    tot_tests = parse_pcr_tot_tests(txt)

    positivity_rate = c.txt_to_float(
        c.search(r'Bei (\d+)% dieser Tests fiel das Resultat positiv aus',
                 txt))
    if positivity_rate is None:
        positivity_rate = c.txt_to_float(
            c.search(r'Positivit.tsrate( \*+| \(%\)|\*+)?\s+(\d\.?\d?)[%\s]',
                     txt,
                     index=2))
    if positivity_rate is None:
        positivity_rate = c.txt_to_float(
            c.search(r'Anteil positive Tests \(%\)(\d)?\s+(\d\.?\d?)[%\s]',
                     txt,
                     index=2))

    isolated = c.txt_to_int(
        c.search(
            r'(\d+)\s+(F.lle|Personen aufgrund einer laborbest.tigten COVID-19 Erkrankung)? in\sIsolation',
            txt,
            index=1))
    quarantined = c.txt_to_int(
        c.search(
            r'(\d+)\s?(in|Kontaktpersonen\sin\s.rztlich\sverordneter)? Quarant.ne',
            txt))
    quarantined_travel = None
    if isolated is None or quarantined is None:
        pos = txt.find('Contact Tracing')
        if pos > 0:
            pcr = re.compile(
                r'Total\s?(\*+|\(%\))?\s+(\d+\s?\d+|\d+)\s+(\d+\s?\d+|\d+)\s+(\d+ ?\d+|\d+)?\n'
            )
            #pcr = re.compile(r'Total\s?(\*+|\(%\))?\s+(\d+)\s+(\d+)\s+(\d+|\d+\s?\d+)?')
            res = pcr.search(txt, pos)
            if res is not None:
                isolated = c.txt_to_int(res[2])
                quarantined = c.txt_to_int(res[3])
                quarantined_travel = c.txt_to_int(res[4].strip())

    print('{},{},{},{},{},{},{}'.format(date, tot_tests or '', positivity_rate
                                        or '', isolated or '', quarantined
                                        or '', quarantined_travel or '',
                                        filename))
Пример #3
0
def parse_weekly_data(filename):
    txt = c.pdf_to_text(filename)

    week = c.search(r'Liechtenstein - Woche (\d+) ', txt)

    tot_tests = None
    pcr_pos = txt.find('PCR-Tests')
    if pcr_pos > 0:
        pcr_pos = txt.find('\n', pcr_pos) + 1
        pcr_end_pos = txt.find('\n', pcr_pos)
        assert pcr_end_pos > pcr_pos
        line = txt[pcr_pos:pcr_end_pos]

        line = re.sub(r'(\d)\s(\d)', r'\1\2', line)
        line = re.sub(r'\s+', r' ', line)
        tot_tests = c.txt_to_int(line.split(' ')[-2])

    positivity_rate = None
    positivity_pos = txt.find('\nPositivit')
    if positivity_pos == -1:
        positivity_pos = txt.find('\nAnteil positive Tests')
    if positivity_pos > 0:
        positivity_pos += 1
        positivity_end_pos = txt.find('\n', positivity_pos)
        assert positivity_end_pos > positivity_pos
        line = txt[positivity_pos:positivity_end_pos]
        line = re.sub(r'\s+', r' ', line)
        positivity_rate = line.split(' ')[-1]
        positivity_rate = c.txt_to_float(positivity_rate.replace('%', ''))

    print('{},{},{},{}'.format(week, tot_tests or '', positivity_rate or '',
                               filename))
Пример #4
0
def parse_canton_data(canton, filename):
    txt = c.pdf_to_text(filename)

    # pylint: disable=W0105
    """
    Coronavirus-Krankheit-2019 (COVID-19)
    Eidgen<C3><B6>ssisches Departement des Innern EDI
    Bundesamt f<C3><BC>r Gesundheit BAG
    Direktionsbereich <C3><96>ffentliche Gesundheit
    Situationsbericht zur epidemiologischen Lage in der Schweiz
    und im F<C3><BC>rstentum Liechtenstein - Woche 28 (06.-12.07.2020)
    """

    year = c.search(r'Stand:\s\d+\.\d+\.(\d{4})', txt)
    week = int(c.search(r'Liechtenstein - Woche (\d+)', txt))

    """
    Canton, tests of previous-week then current-week

    AG 5478 3588 808 529 1.3 1.8
    AI 96 55 595 341 0.0 0.0
    AR 391 249 708 451 0.5 1.2
    BE 6924 4652 669 449 0.4 0.9
    ...
    """
    start = txt.find('Anzahl PCR-Tests in der Schweiz')
    if start == -1:
        start = txt.find('Anzahl durchgeführte PCR-Tests in der Schweiz')
    if start == -1:
        start = txt.find('Anzahl durchgeführte Tests in der Schweiz')
    if start == -1:
        start = txt.find('Anzahl gemeldeter Tests, Anzahl Tests pro')
    if start > 0:
        start = txt.find(r' AG ', start)
    else:
        start = 0
    end = txt.find('Tabelle 4. Durchgeführte Tests nach Kalenderwoche', start)
    if end == -1:
        end = txt.find('Die Altersverteilung der', start)
    if end == -1:
        end = txt.find('Die Anzahl durchgeführter Tests', start)
        if end >= 0:
            end -= 1
    if end == -1:
        end = txt.find('Gemeldete Tests nach Alter und Geschlecht', start)
    if end > start > 0 and end > start:
        tests_table = txt[start:end]
        # the numbers are sometimes separated with spaces for >1k values
        pcr = re.compile(r'(\d+)\s(\d+)')
        tests_table = pcr.sub(r'\1\2', tests_table)
        number_of_tests = c.txt_to_int(c.search(r'(\n\s+)?{}\s+\d+\s+(\d+)'.format(canton), tests_table, index=2))
        positivity_rate = c.txt_to_float(c.search(r'(\n\s+)?{}\s+.*\s([0-9]+\.[0-9]+)\n'.format(canton), tests_table, index=2))

    print(f'{year},{week},{number_of_tests},{positivity_rate},{filename}')
Пример #5
0
def parse_pcr_tot_tests(txt):
    tot_tests = c.txt_to_int(
        c.search(r'insgesamt auf( .ber| rund| mehr als)? ([\d\s.]+)\.',
                 txt,
                 index=2))
    pcr_pos = txt.find('Tests')
    if tot_tests is None and pcr_pos > 0:
        # extract the line with Total / Totale Anzahl
        pcr_pos = txt.find('\n', pcr_pos) + 1
        pcr_end_pos = txt.find('\n', pcr_pos)
        line = txt[pcr_pos:pcr_end_pos]
        # replace whitespace between numbers '937 488' -> '937488'
        line = re.sub(r'(\d)\s(\d)', r'\1\2', line)
        # match the value
        pcr = re.compile(r'(Totale Anzahl|Total)\s+\+?(\d+)\s')
        res = pcr.match(line)
        if res is not None:
            tot_tests = c.txt_to_int(res[2])
            return tot_tests
        res = re.search(r'Total durchgef.hrte Tests\s+(\d+)\s+\+?\d+\s', line)
        if res is not None:
            tot_tests = c.txt_to_int(res[1])
    return tot_tests