Exemplo n.º 1
0
def test_clean_data_germany_goettingen_alt_is_fluke():
    germany_data = c.fetch_data_germany(filter_goettingen_alt=False)
    cleaned = c.fetch_data_germany(filter_goettingen_alt=True)
    ## could also use this command:
    cleaned = c.clean_data_germany_remove_goettingen_alt(germany_data)

    # how many rows did we delete?
    n = len(germany_data) - len(cleaned)

    if n == 1:
        # Normal as of 167 Sept (see oscovida.cleane_data_germany_remove_göttingen_alt.__doc__)
        pass
    elif n > 1:
        msg = f"We have found {n} rows of Göttingen alt data - please investigate\n" + \
            "if this is a real / important LK in Germany"
        raise ValueError(msg, germany_data, cleaned)
    elif n == 0:
        msg = "There are now rows with LK Göttingen (alt). \n" + \
            "Consider removing the data cleaning code for Göttingen (alt)."
        print(msg)
        # should we raise an error here to notice this situation?
        raise ValueError(msg)
    else:
        raise NotImplementedError("This should not be possible.", germany_data,
                                  cleaned)
Exemplo n.º 2
0
def generate_reports_germany(
    *,
    workers,
    kernel_name,
    wwwroot,
    force,
    disable_pbar,
    debug,
    incidence_period=7,
    incidence_threshold=50,
):
    _ = oscovida.fetch_data_germany()

    #  TODO: The get_x_list methods should be part of Reporter class
    germany_regions = get_germany_regions_list()

    # data cleaning: on 13 April, we had a Landkreis "LK Göttingen (alt)"
    # with only one data point. This causes plots to fail, because there
    # is nothing to plot, and then the legend() command failed.
    # We assume that the RKI labels unusual data with '(alt)', and remove those.

    alt_data_sets = ["(alt)" in r[1].lower() for r in germany_regions]
    if any(alt_data_sets):
        bad_datasets = list(compress(germany_regions, alt_data_sets))

        logging.warning(
            f"Removing datasets label with '(alt)': {bad_datasets}")

        for bd in bad_datasets:
            c, d = oscovida.germany_get_region(landkreis=bd[1])
            logging.warning(
                f"\tremoved: {bd} : len(cases)={len(c)}, len(deaths)={len(d)}")

        bad_indices = list(compress(range(len(alt_data_sets)), alt_data_sets))

        for i in sorted(bad_indices, reverse=True):
            del germany_regions[i]

    gre = ReportExecutor(
        Reporter=GermanyReport,
        kernel_name=kernel_name,
        wwwroot=wwwroot,
        expiry_hours=2,
        attempts=3,
        workers=workers,
        force=force,
        disable_pbar=disable_pbar,
        debug=debug,
    )

    if debug:
        germany_regions = germany_regions[:10]

    gre.create_html_reports(germany_regions)

    gre.create_markdown_index_page()

    gre.create_markdown_incidence_page(period=incidence_period,
                                       threshold=incidence_threshold)
Exemplo n.º 3
0
def get_germany_subregion_list():
    """returns list of subregions (Kreise),
    ordered according to (i) Land, then (ii) Kreis
    """
    x = fetch_data_germany()
    land_kreis = x[['Bundesland', 'Landkreis']]
    ordered = land_kreis.sort_values(['Bundesland', 'Landkreis'])
    return list(ordered['Landkreis'].drop_duplicates())
Exemplo n.º 4
0
def test_germany_get_population():
    germany = c.germany_get_population()

    assert germany.index.name == 'county'
    assert 'population' in germany.columns
    assert 'cases7_per_100k' in germany.columns

    germany_data = c.fetch_data_germany()
    assert set(germany_data['Landkreis']) == set(germany.index)

    hamburg = germany.loc['SK Hamburg'].population
    assert hamburg > 1800000

    pinneberg = germany.loc['LK Pinneberg'].population
    assert pinneberg > 30000
Exemplo n.º 5
0
def test_germany_get_population():
    germany = c.germany_get_population()

    assert germany.index.name == 'county'
    assert 'population' in germany.columns
    assert 'cases7_per_100k' in germany.columns

    germany_data = c.fetch_data_germany()
    assert set(germany_data['Landkreis']) == set(germany.index)

    hamburg = germany.loc['SK Hamburg'].population
    assert hamburg > 1800000

    pinneberg = germany.loc['LK Pinneberg'].population
    assert pinneberg > 30000

    # https://github.com/oscovida/oscovida/issues/210
    saarpfalz = germany.loc['LK Saarpfalz-Kreis'].population
    assert saarpfalz > 130000

    aachen = germany.loc['StadtRegion Aachen'].population
    assert aachen > 500000
Exemplo n.º 6
0
 def germany_check_subregion__is_known(subregion):
     d = oscovida.fetch_data_germany()
     assert subregion in list(
         d["Landkreis"].drop_duplicates()), f"{subregion} is unknown."
Exemplo n.º 7
0
 def germany_check_region_is_known(region):
     d = oscovida.fetch_data_germany()
     assert region in list(
         d["Bundesland"].drop_duplicates()), f"{region} is unknown."
Exemplo n.º 8
0
def get_germany_regions_list():
    data_germany = oscovida.fetch_data_germany()
    land_kreis = data_germany[["Bundesland", "Landkreis"]]
    ordered = land_kreis.sort_values(["Bundesland", "Landkreis"])
    return ordered.drop_duplicates().values.tolist()