Python Config示例，covid_model_parametrization.config.Config Python示例

示例#1

0

显示文件

def qc(country_iso3, config=None):
    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)
    main_dir = os.path.join(config.MAIN_OUTPUT_DIR, country_iso3)
    check_graph(config, parameters, country_iso3, main_dir)
    check_npis(config, country_iso3, main_dir)

示例#2

0

显示文件

文件： graph.py 项目： OCHA-DAP/pa-COVID-model-parameterization

def graph(country_iso3, end_date, config=None):

    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)

    logger.info(f"Creating graph for {country_iso3}")
    main_dir = os.path.join(config.MAIN_OUTPUT_DIR, country_iso3)

    # Initialize graph with mobility edges
    mobility_csv = os.path.join(
        main_dir, config.MOBILITY_DIR,
        config.MOBILITY_FILENAME.format(country_iso3=country_iso3))
    G = initialize_with_mobility(mobility_csv)
    G.graph["country"] = country_iso3

    # Add exposure
    G = add_exposure(G, main_dir, country_iso3, parameters["admin"], config)

    # Add COVID cases
    G = add_covid(G, main_dir, country_iso3, end_date, config)

    # Add WHO data
    G = add_WHO_data(G, country_iso3, end_date, parameters, config)

    # Add vulnerability
    G = add_vulnerability(G, main_dir, country_iso3, config)

    # Add contact matrix
    add_contact_matrix(G, parameters["contact_matrix"], config)

    input_shp = os.path.join(
        config.INPUT_DIR,
        country_iso3,
        config.SHAPEFILE_DIR,
        parameters["admin"]["directory"],
        f'{parameters["admin"]["directory"]}.shp',
    )
    # Add general attributes to ensure compatibility with Bucky requirements
    G = add_general_attributes(G, country_iso3, input_shp)

    # Write out
    data = nx.readwrite.json_graph.node_link_data(G)
    outdir = os.path.join(main_dir, config.GRAPH_OUTPUT_DIR)
    Path(outdir).mkdir(parents=True, exist_ok=True)
    outfile_json = os.path.join(
        main_dir, config.GRAPH_OUTPUT_DIR,
        config.GRAPH_OUTPUT_FILE_JSON.format(country_iso3))
    outfile_pickle = os.path.join(
        main_dir, config.GRAPH_OUTPUT_DIR,
        config.GRAPH_OUTPUT_FILE_PICKLE.format(country_iso3))

    with open(outfile_json, "w") as f:
        json.dump(data, f, indent=2)
    logger.info(f"Wrote out to {outfile_json}")

    with open(outfile_pickle, 'wb') as f:
        pickle.dump(G, f)

示例#3

0

显示文件

文件： npis.py 项目： OCHA-DAP/pa-COVID-model-parameterization

def npis(country_iso3,
         update_npi_list_arg,
         create_final_list_arg,
         download_acaps_arg=False,
         config=None):

    # Get config file
    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)

    #  Get NPIs for each country
    if update_npi_list_arg:
        update_npi_list(config, parameters, country_iso3, download_acaps_arg)
    if create_final_list_arg:
        create_final_list(config, parameters, country_iso3)

示例#4

0

显示文件

文件： graph.py 项目： orest-d/pa-COVID-model-parameterization

def graph(country_iso3, mobility_csv, config=None):

    if config is None:
        config = Config()
    parameters = config.parameters[country_iso3]

    logger.info(f"Creating graph for {country_iso3}")
    main_dir = os.path.join(config.MAIN_OUTPUT_DIR, country_iso3)

    # Initialize graph with mobility edges
    G = initialize_with_mobility(mobility_csv)
    G.graph["country"] = country_iso3

    # Add exposure
    G = add_exposure(G, main_dir, country_iso3, parameters["admin"], config)

    # Add COVID cases
    G = add_covid(G, main_dir, country_iso3, config)

    # Add vulnerability
    G = add_vulnerability(G, main_dir, country_iso3, config)

    # Add contact matrix
    add_contact_matrix(G, parameters["contact_matrix"], config)

    # Write out
    data = nx.readwrite.json_graph.node_link_data(G)
    outdir = os.path.join(main_dir, config.GRAPH_OUTPUT_DIR)
    Path(outdir).mkdir(parents=True, exist_ok=True)
    outfile = os.path.join(main_dir, config.GRAPH_OUTPUT_DIR,
                           config.GRAPH_OUTPUT_FILE.format(country_iso3))
    with open(outfile, "w") as f:
        json.dump(data, f, indent=2)
    logger.info(f"Wrote out to {outfile}")

示例#5

0

显示文件

def mobility(country_iso3, read_in_crossings=True, read_in_distances=True, config=None):
    # Read in the files
    logger.info(f'Running for country {country_iso3}')
    # Get config and parameters
    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)
    # Make the output directory if it doesn't exist
    output_dir = os.path.join(config.MAIN_OUTPUT_DIR, country_iso3, config.MOBILITY_DIR)
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    # Load admin regions
    df_adm = load_adm(country_iso3, config, parameters)
    # Read in population file
    df_pop = gpd.read_file(exposure.get_output_filename(country_iso3, config))
    if read_in_crossings:
        logger.info('Reading in saved roads file')
        df_roads = gpd.read_file(os.path.join(output_dir, config.CROSSINGS_FILENAME))
        for cname in ['crossings', 'crossing_pairs']:
            df_roads[cname] = df_roads[cname].apply(ast.literal_eval)
    else:
        df_borders = get_borders(df_adm)
        df_roads = load_roads(country_iso3, config, parameters['mobility'], df_borders)
        df_roads = get_road_crossings(df_roads, df_adm)
        df_roads_out = df_roads.copy()
        for cname in ['crossings', 'crossing_pairs']:
            df_roads_out[cname] = df_roads_out[cname].apply(str)
        df_roads_out.to_file(os.path.join(output_dir, config.CROSSINGS_FILENAME), driver='GPKG')
    # Get centroid dist
    if read_in_distances:
        logger.info('Reading in saved distances file')
        df_dist = pd.read_csv(os.path.join(output_dir, config.DISTANCES_FILENAME))
    else:
        df_dist = get_centroid_dist(df_adm)
        df_dist.to_csv(os.path.join(output_dir, config.DISTANCES_FILENAME), index=False)
    # Count the number of crossings
    df_dist = count_crossings(df_dist, df_roads, config)
    # Create matrix and plot
    df_matrix = create_matrix(df_adm, df_dist, parameters['mobility']['scaling_factor'], df_pop)
    fig = plot_final_hist(df_matrix, country_iso3)
    # Save matrix and plot
    df_matrix.to_csv(os.path.join(output_dir, config.MOBILITY_FILENAME.format(country_iso3=country_iso3)))
    fig.savefig(os.path.join(output_dir, config.MOBILITY_FIGNAME.format(country_iso3=country_iso3)), format='png')

示例#6

0

显示文件

文件： test_exposure.py 项目： orest-d/pa-COVID-model-parameterization

    def test_get_worldpop_data(self, monkeypatch, tmp_path):
        config = Config()
        config.WORLDPOP_DIR = "TestWorldPop"
        config.AGE_CLASSES = [10, 20, 30]
        input_dir = tmp_path / "TestInputDir"
        # input_dir.mkdir()

        downloads = []

        def mock_download_ftp(url, save_path):
            downloads.append((url, save_path))

        monkeypatch.setattr(utils, "download_ftp", mock_download_ftp)

        get_worldpop_data("XYZ", os.fspath(input_dir), config)

        assert (len(downloads) == len(config.AGE_CLASSES) *
                len(config.GENDER_CLASSES) + 2)
        files = [f"xyz_{b}_{a}_2020.tif" for a in (10, 20, 30) for b in "fm"]
        for f, (url, save) in zip(files, downloads):
            assert url.endswith(f)
            assert save.endswith(f)

示例#7

0

显示文件

def exposure(country_iso3, download_worldpop=False, config=None):

    # Get parameters file
    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)
    input_dir = os.path.join(config.DIR_PATH, config.INPUT_DIR, country_iso3)

    # Get input boundary shape file
    ADM2boundaries = utils.read_in_admin_boundaries(config, parameters,
                                                    country_iso3)

    # Download the worldpop data
    if download_worldpop:
        get_worldpop_data(country_iso3, input_dir, config)

    # gender and age groups
    gender_age_groups = list(
        itertools.product(config.GENDER_CLASSES, config.AGE_CLASSES))
    for gender_age_group in gender_age_groups:
        gender_age_group_name = f"{gender_age_group[0]}_{gender_age_group[1]}"
        logger.info(f"analyising gender age {gender_age_group_name}")
        input_tiff_file = os.path.join(
            input_dir,
            config.WORLDPOP_DIR,
            config.WORLDPOP_FILENAMES["sadd"].format(
                country_iso3=country_iso3.lower(),
                gender=gender_age_group[0],
                age=gender_age_group[1],
            ),
        )
        raster = rasterio.open(input_tiff_file)
        ADM2boundaries[gender_age_group_name] = (
            ADM2boundaries['geometry'].apply(
                lambda x: mask(raster, [x], crop=True, nodata=0)[0].sum()))

    # get total pops
    for pop_type, cname in zip(["pop", "unadj"], ["tot_pop_WP", "tot_pop_UN"]):
        logger.info(f"adding {pop_type}")
        input_tiff_pop = os.path.join(
            input_dir,
            config.WORLDPOP_DIR,
            config.WORLDPOP_FILENAMES[pop_type].format(
                country_iso3=country_iso3.lower()),
        )
        raster = rasterio.open(input_tiff_pop)
        ADM2boundaries[cname] = (ADM2boundaries['geometry'].apply(
            lambda x: mask(raster, [x], crop=True, nodata=0)[0].sum()))

    # total from disaggregated
    logger.info("scaling SADD data to match UN Adjusted population estimates")
    gender_age_group_names = [
        "{}_{}".format(gender_age_group[0], gender_age_group[1])
        for gender_age_group in gender_age_groups
    ]
    for index, row in ADM2boundaries.iterrows():
        tot_UN = row["tot_pop_UN"]
        tot_sad = row[gender_age_group_names].sum()
        try:
            ADM2boundaries.loc[index,
                               gender_age_group_names] *= tot_UN / tot_sad
        except ZeroDivisionError:
            region_name = row[f'ADM2_{parameters["admin"]["language"]}']
            logger.warning(
                f"The sum across all genders and ages for admin region {region_name} is 0"
            )

    if "pop_co" in parameters:
        print("Further scaling SADD data to match CO estimates")
        # scaling at the ADM1 level to match figures used by Country Office instead of UN stats
        input_pop_co_filename = os.path.join(input_dir, config.CO_DIR,
                                             parameters["pop_co"]["filename"])
        df_operational_figures = pd.read_excel(input_pop_co_filename,
                                               usecols="A,D")
        df_operational_figures["Province"] = df_operational_figures[
            "Province"].replace(parameters["pop_co"]["province_names"])
        # creating dictionary and add pcode the pcode
        ADM1_names = dict()
        for k, v in ADM2boundaries.groupby("ADM1_EN"):
            ADM1_names[k] = v.iloc[0, :].ADM1_PCODE
        df_operational_figures["ADM1_PCODE"] = df_operational_figures[
            "Province"].map(ADM1_names)
        if df_operational_figures["ADM1_PCODE"].isnull().sum() > 0:
            print(
                "missing PCODE for: ",
                df_operational_figures[
                    df_operational_figures["ADM1_PCODE"].isnull()],
            )
        # get total by ADM1
        tot_co_adm1 = df_operational_figures.groupby(
            "ADM1_PCODE").sum()["Estimated Population - 2020"]
        tot_sad_adm1 = (ADM2boundaries.groupby("ADM1_PCODE")
                        [gender_age_group_names].sum().sum(axis=1))
        for index, row in ADM2boundaries.iterrows():
            adm1_pcode = row["ADM1_PCODE"]
            pop_co = tot_co_adm1.get(adm1_pcode)
            pop_sad = tot_sad_adm1.get(adm1_pcode)
            ADM2boundaries.loc[index,
                               gender_age_group_names] *= pop_co / pop_sad

    ADM2boundaries["tot_sad"] = ADM2boundaries.loc[:,
                                                   gender_age_group_names].sum(
                                                       axis=1)

    # adding manually Kochi nomads
    if "kochi" in parameters:
        logger.info("Adding Kochi")
        ADM1_kochi = parameters["kochi"]["adm1"]
        # total population in these provinces
        pop_in_kochi_ADM1 = ADM2boundaries[ADM2boundaries["ADM1_PCODE"].isin(
            ADM1_kochi)]["tot_sad"].sum()
        for row_index, row in ADM2boundaries.iterrows():
            if row["ADM1_PCODE"] in ADM1_kochi:
                tot_kochi_in_ADM2 = 0
                for gender_age_group in gender_age_groups:
                    # population weighted
                    gender_age_group_name = (
                        f"{gender_age_group[0]}_{gender_age_group[1]}")
                    kochi_pp = parameters["kochi"]["total"] * (
                        row[gender_age_group_name] / pop_in_kochi_ADM1)
                    ADM2boundaries.loc[row_index, gender_age_group_name] = (
                        row[gender_age_group_name] + kochi_pp)
                    tot_kochi_in_ADM2 += kochi_pp
                ADM2boundaries.loc[row_index, "kochi"] = tot_kochi_in_ADM2
                comment = f"Added in total {tot_kochi_in_ADM2} Kochi nomads to WorldPop estimates"
                ADM2boundaries.loc[row_index, "comment"] = comment

    # Write to file
    ADM2boundaries["created_at"] = str(datetime.datetime.now())
    ADM2boundaries["created_by"] = getpass.getuser()
    output_geojson = get_output_filename(country_iso3, config)
    logger.info(f"Writing to file {output_geojson}")
    utils.write_to_geojson(output_geojson, ADM2boundaries)

示例#8

0

显示文件

文件： vulnerability.py 项目： orest-d/pa-COVID-model-parameterization

def vulnerability(country_iso3, download_ghs=False, config=None):

    # Get config file
    if config is None:
        config = Config()
    parameters = config.parameters[country_iso3]

    # Get input boundary shape file
    input_dir = os.path.join(config.DIR_PATH, config.INPUT_DIR, country_iso3)
    input_shp = os.path.join(
        input_dir,
        config.SHAPEFILE_DIR,
        parameters["admin"]["directory"],
        f'{parameters["admin"]["directory"]}.shp',
    )
    boundaries = gpd.read_file(input_shp).to_crs(config.GHS_CRS)

    # Download the tiles and read them in
    if download_ghs:
        get_ghs_data("SMOD", parameters["ghs"], country_iso3, input_dir,
                     config)
        get_ghs_data("POP", parameters["ghs"], country_iso3, input_dir, config)
    ghs_smod = rasterio.open(
        os.path.join(
            input_dir,
            config.GHS_DIR,
            config.OUTPUT_GHS["SMOD"].format(country_iso3=country_iso3),
        ))
    ghs_pop = rasterio.open(
        os.path.join(
            input_dir,
            config.GHS_DIR,
            config.OUTPUT_GHS["POP"].format(country_iso3=country_iso3),
        ))

    # adding urban/rural disaggregation data using JRC GHSL input
    logger.info("Calculating urban population fraction")
    boundaries["frac_urban"] = boundaries["geometry"].apply(
        lambda x: calc_frac_urban(x, ghs_smod, ghs_pop, config))

    # Get food insecurity
    logger.info("Getting food insecurity")
    boundaries = add_food_insecurity(
        parameters["ipc"],
        input_dir,
        boundaries,
        parameters["admin"]["language"],
        config,
    )

    # Get solid fuels
    if "solid_fuels" in parameters:
        logger.info("Getting Solid Fuels data")
        boundaries = add_factor_urban_rural(boundaries, "fossil_fuels",
                                            parameters["solid_fuels"])
    else:
        logger.info(
            f"Solid fuels data not available for country {country_iso3}")

    # Get handwashing facilities
    if "handwashing_facilities" in parameters:
        logger.info("Getting Handwashing facilities data")
        boundaries = add_factor_urban_rural(
            boundaries, "handwashing_facilities",
            parameters["handwashing_facilities"])
    else:
        logger.info(
            f"Handwashing facilities data not available for country {country_iso3}"
        )

    # Get raised blood pressure
    if "raised_blood_pressure" in parameters:
        logger.info("Getting Raised Blood Pressure data")
        add_factor_18plus(
            boundaries,
            parameters["raised_blood_pressure"],
            "raised_blood_pressure",
            country_iso3,
            config,
        )
    else:
        logger.info(
            f"Raised blood pressure data not available for country {country_iso3}"
        )

    # Get raised blood pressure
    if "diabetes" in parameters:
        logger.info("Getting diabetes data")
        add_factor_18plus(boundaries, parameters["diabetes"], "diabetes",
                          country_iso3, config)
    else:
        logger.info(f"Diabetes data not available for country {country_iso3}")

    # Get smoking
    if "smoking" in parameters:
        logger.info("Getting smoking data")
        add_factor_18plus(boundaries, parameters["smoking"], "smoking",
                          country_iso3, config)
    else:
        logger.info(f"Smoking data not available for country {country_iso3}")

    # Write out results
    output_dir = os.path.join(
        config.DIR_PATH,
        config.vulnerability_output_dir().format(country_iso3))
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    output_geojson = os.path.join(
        output_dir,
        config.VULNERABILITY_FILENAME.format(country_iso3=country_iso3))
    logger.info(f"Saving results to {output_geojson}")
    utils.write_to_geojson(output_geojson, boundaries.to_crs(config.SHP_CRS))

示例#9

0

显示文件

文件： covid.py 项目： OCHA-DAP/pa-COVID-model-parameterization

def covid(country_iso3, download_covid=False, config=None):
    # Get config file
    if config is None:
        config = Config()
    parameters = config.parameters(country_iso3)

    # Get input covid file
    input_dir = os.path.join(config.DIR_PATH, config.INPUT_DIR, country_iso3)

    # Download latest covid file tiles and read them in
    if download_covid:
        get_covid_data(parameters["covid"], country_iso3, input_dir, config)
    df_covid = pd.read_csv(
        "{}/{}".format(
            os.path.join(input_dir, config.COVID_OUTPUT_DIR),
            parameters["covid"]["filename"],
        ),
        header=parameters["covid"]["header"],
        skiprows=parameters["covid"]["skiprows"],
    )
    # drop duplicates. Some datasets have duplicated rows on HDX
    df_covid = df_covid.drop_duplicates()

    # convert to standard HLX
    if "hlx_dict" in parameters["covid"]:
        df_covid = df_covid.rename(columns=parameters["covid"]["hlx_dict"])

    # in South Sudan we have individual case data which need to be aggregated at the ADM2 level
    if (parameters["covid"]["individual_case_data"]
            and parameters["covid"]["admin_level"] == 2):
        df_covid = pd.pivot_table(
            df_covid,
            index=[
                config.HLX_TAG_DATE,
                config.HLX_TAG_ADM1_NAME,
                config.HLX_TAG_ADM2_NAME,
            ],
            aggfunc="count",
        ).reset_index()
        df_covid = df_covid.rename(
            columns={"Case No.": config.HLX_TAG_TOTAL_CASES})
        df_covid = df_covid[[
            config.HLX_TAG_DATE,
            config.HLX_TAG_ADM1_NAME,
            config.HLX_TAG_ADM2_NAME,
            config.HLX_TAG_TOTAL_CASES,
        ]]

    # convert to numeric
    if parameters["covid"]["cases"]:
        df_covid[config.HLX_TAG_TOTAL_CASES] = convert_to_numeric(
            df_covid[config.HLX_TAG_TOTAL_CASES])
    if parameters["covid"]["deaths"]:
        df_covid[config.HLX_TAG_TOTAL_DEATHS] = convert_to_numeric(
            df_covid[config.HLX_TAG_TOTAL_DEATHS])
    df_covid.fillna(0, inplace=True)

    # remove Total for spatially disaggregated data
    df_covid = df_covid[df_covid[config.HLX_TAG_ADM1_NAME] != "Total"]
    # cleaning up names before using the replace dictionary
    df_covid[config.HLX_TAG_ADM1_NAME] = df_covid[
        config.HLX_TAG_ADM1_NAME].str.replace("Province", "")
    df_covid[config.HLX_TAG_ADM1_NAME] = df_covid[
        config.HLX_TAG_ADM1_NAME].str.replace("State", "")
    df_covid[config.HLX_TAG_ADM1_NAME] = df_covid[
        config.HLX_TAG_ADM1_NAME].str.strip()
    # apply replace dict to match ADM unit names in the COD with teh COVID data
    if ("replace_dict" in parameters["covid"]
            and parameters["covid"]["admin_level"] == 1):
        df_covid[config.HLX_TAG_ADM1_NAME] = df_covid[
            config.HLX_TAG_ADM1_NAME].replace(
                parameters["covid"]["replace_dict"])
        # Some datasets have mutliple rows corresponsing to the same ADM1
        df_covid = df_covid.groupby(
            [config.HLX_TAG_ADM1_NAME,
             config.HLX_TAG_DATE]).sum().reset_index()
    if ("replace_dict" in parameters["covid"]
            and parameters["covid"]["admin_level"] == 2):
        df_covid[config.HLX_TAG_ADM2_NAME] = df_covid[
            config.HLX_TAG_ADM2_NAME].replace(
                parameters["covid"]["replace_dict"])
        # Some datasets have mutliple rows corresponsing to the same ADM2
        df_covid = df_covid.groupby(
            [config.HLX_TAG_ADM2_NAME,
             config.HLX_TAG_DATE]).sum().reset_index()

    # Get exposure file
    try:
        exposure_file = f"{config.SADD_output_dir().format(country_iso3)}/{config.EXPOSURE_GEOJSON.format(country_iso3)}"
        exposure_gdf = gpd.read_file(exposure_file)
    except Exception as err:
        logger.error(
            f"Cannot get exposure file for {country_iso3}, COVID file not generate"
        )
        raise err

    output_fields = [
        config.HLX_TAG_ADM1_PCODE,
        config.HLX_TAG_ADM2_PCODE,
        config.HLX_TAG_DATE,
        config.HLX_TAG_TOTAL_CASES,
        config.HLX_TAG_TOTAL_DEATHS,
    ]
    output_df_covid = pd.DataFrame(columns=output_fields)

    ADM2_ADM1_pcodes = get_dict_pcodes(exposure_gdf, "ADM2_PCODE")

    ADM0_CFR = 0
    if not parameters["covid"]["deaths"]:
        # missing death data, getting it from WHO at the national level
        who_df = get_WHO_data(config, country_iso3,hxlize=False,\
            smooth_data=parameters['WHO']['smooth_data'],n_days_smoothing=parameters['WHO']['n_days_smoothing'])
        who_df['Date_reported'] = pd.to_datetime(who_df['Date_reported'])
        who_df = who_df.sort_values(by='Date_reported')
        who_df = who_df.set_index('Date_reported')
        latest_date = who_df.tail(1).index.values[0]
        # get the CFR from the latest month, to account for recent reporting rate estimation
        who_df = who_df.loc[latest_date - np.timedelta64(30, 'D'):latest_date]
        deaths = who_df.iloc[-1]['Cumulative_deaths'] - who_df.iloc[0][
            'Cumulative_deaths']
        cases = who_df.iloc[-1]['Cumulative_cases'] - who_df.iloc[0][
            'Cumulative_cases']
        ADM0_CFR = deaths / cases
        if deaths < 100 or ADM0_CFR > 0.3:
            # if it's less than 100 use the cumulative to reduce noise
            # When there are adjustments to the data we may have a jump in the CFR
            # calcualted from the latest month,i t should be captured by the ADM0_CFR> 0.3
            deaths = who_df.iloc[-1]['Cumulative_deaths']
            cases = who_df.iloc[-1]['Cumulative_cases']
            ADM0_CFR = deaths / cases

    if parameters["covid"]["admin_level"] == 2:
        ADM2_names = get_dict_pcodes(exposure_gdf,
                                     parameters["covid"]["adm2_name_exp"],
                                     "ADM2_PCODE")
        df_covid[config.HLX_TAG_ADM2_PCODE] = df_covid[
            config.HLX_TAG_ADM2_NAME].map(ADM2_names)
        if df_covid[config.HLX_TAG_ADM2_PCODE].isnull().sum() > 0:
            logger.warning("missing PCODE for the following admin units ")
            logger.warning(
                df_covid[df_covid[config.HLX_TAG_ADM2_PCODE].isnull()][
                    config.HLX_TAG_ADM2_NAME].values)
            # print(df_covid)
            return
        df_covid[config.HLX_TAG_ADM1_PCODE] = df_covid[
            config.HLX_TAG_ADM2_PCODE].map(ADM2_ADM1_pcodes)
        adm1pcode = df_covid[config.HLX_TAG_ADM1_PCODE]
        adm2pcodes = df_covid[config.HLX_TAG_ADM2_PCODE]
        date = pd.to_datetime(df_covid[config.HLX_TAG_DATE],
                              format=parameters["covid"]["date_format"])
        date = date.dt.strftime("%Y-%m-%d")
        adm2cases = (df_covid[config.HLX_TAG_TOTAL_CASES]
                     if parameters["covid"]["cases"] else None)
        adm2deaths = (df_covid[config.HLX_TAG_TOTAL_DEATHS]
                      if parameters["covid"]["deaths"] else None)
        if not parameters["covid"]["deaths"]:
            adm2deaths = [cases * ADM0_CFR for cases in adm2cases]

        raw_data = {
            config.HLX_TAG_ADM1_PCODE: adm1pcode,
            config.HLX_TAG_ADM2_PCODE: adm2pcodes,
            config.HLX_TAG_DATE: date,
            config.HLX_TAG_TOTAL_CASES: adm2cases,
            config.HLX_TAG_TOTAL_DEATHS: adm2deaths,
        }
        output_df_covid = output_df_covid.append(pd.DataFrame(raw_data),
                                                 ignore_index=True)
    elif parameters["covid"]["admin_level"] == 1:
        if parameters["covid"].get("federal_state_dict", False):
            # for Somalia we replace the ADM1_PCODE the name of the ADM1 and with the name of the state
            # this is done according to the dictionary
            exposure_gdf["ADM1_PCODE"] = exposure_gdf[
                parameters["covid"]["adm1_name_exp"]].replace(
                    parameters["covid"]["federal_state_dict"])
            exposure_gdf[parameters["covid"]
                         ["adm1_name_exp"]] = exposure_gdf["ADM1_PCODE"]
        # get dictionary of ADM1 pcodes
        ADM1_names = get_dict_pcodes(exposure_gdf,
                                     parameters["covid"]["adm1_name_exp"],
                                     "ADM1_PCODE")
        # create new column with pcodes
        df_covid[config.HLX_TAG_ADM1_PCODE] = df_covid[
            config.HLX_TAG_ADM1_NAME].map(ADM1_names)
        # check if any pcode is missing
        if df_covid[config.HLX_TAG_ADM1_PCODE].isnull().sum() > 0:
            logger.warning("missing PCODE for the following admin units :")
            logger.warning(
                df_covid[df_covid[config.HLX_TAG_ADM1_PCODE].isnull()][
                    config.HLX_TAG_ADM1_NAME].values)
        # get the full list of gender/age combinations to calculate the sum of population in adm2_pop_fractions
        # in principle we could use the sum in the exposure but it's safer to recalculate it
        gender_age_groups = list(
            itertools.product(config.GENDER_CLASSES, config.AGE_CLASSES))
        gender_age_group_names = [
            "{}_{}".format(gender_age_group[0], gender_age_group[1])
            for gender_age_group in gender_age_groups
        ]

        for _, row in df_covid.iterrows():
            adm2_pop_fractions = get_adm2_to_adm1_pop_frac(
                row[config.HLX_TAG_ADM1_PCODE], exposure_gdf,
                gender_age_group_names)
            adm1pcode = row[config.HLX_TAG_ADM1_PCODE]
            date = datetime.datetime.strptime(
                row[config.HLX_TAG_DATE],
                parameters["covid"]["date_format"]).strftime("%Y-%m-%d")
            adm2cases = scale_adm1_by_adm2_pop(
                parameters["covid"]["cases"],
                config.HLX_TAG_TOTAL_CASES,
                row,
                adm2_pop_fractions,
            )
            adm2deaths = scale_adm1_by_adm2_pop(
                parameters["covid"]["deaths"],
                config.HLX_TAG_TOTAL_DEATHS,
                row,
                adm2_pop_fractions,
            )
            if not parameters["covid"]["deaths"]:
                adm2deaths = [cases * ADM0_CFR for cases in adm2cases]

            adm2pcodes = [v for v in adm2_pop_fractions.keys()]
            raw_data = {
                config.HLX_TAG_ADM1_PCODE: adm1pcode,
                config.HLX_TAG_ADM2_PCODE: adm2pcodes,
                config.HLX_TAG_DATE: date,
                config.HLX_TAG_TOTAL_CASES: adm2cases,
                config.HLX_TAG_TOTAL_DEATHS: adm2deaths,
            }
            output_df_covid = output_df_covid.append(pd.DataFrame(raw_data),
                                                     ignore_index=True)
    else:
        logger.error(f"Missing admin_level info for COVID data")
    # cross-check: the total must match
    if (abs((output_df_covid[config.HLX_TAG_TOTAL_CASES].sum() -
             df_covid[config.HLX_TAG_TOTAL_CASES].sum())) > 10):
        logger.warning("The sum of input and output files don't match")

    if not parameters["covid"]["cumulative"]:
        logger.info(f"Calculating cumulative numbers COVID data")
        groups = [
            config.HLX_TAG_ADM1_PCODE,
            config.HLX_TAG_ADM2_PCODE,
            config.HLX_TAG_DATE,
        ]
        # TODO check this was not numeric in the case of SSD
        output_df_covid[config.HLX_TAG_TOTAL_CASES] = pd.to_numeric(
            output_df_covid[config.HLX_TAG_TOTAL_CASES])
        output_df_covid[config.HLX_TAG_TOTAL_DEATHS] = pd.to_numeric(
            output_df_covid[config.HLX_TAG_TOTAL_DEATHS])
        # get sum by day (in case multiple reports per day)
        output_df_covid = (output_df_covid.groupby(groups).sum().sort_values(
            by=config.HLX_TAG_DATE))
        # get cumsum by day (grouped by ADM2)
        output_df_covid = (output_df_covid.groupby(
            config.HLX_TAG_ADM2_PCODE).cumsum().reset_index())

    if parameters["covid"].get("federal_state_dict", False):
        # bring back the adm1 pcode that we modified to calculate the sum
        output_df_covid[config.HLX_TAG_ADM1_PCODE] = output_df_covid[
            config.HLX_TAG_ADM2_PCODE].map(ADM2_ADM1_pcodes)

    # Write to file
    output_df_covid["created_at"] = str(datetime.datetime.now())
    output_df_covid["created_by"] = getpass.getuser()
    output_csv = get_output_filename(country_iso3, config)
    logger.info(f"Writing to file {output_csv}")
    output_df_covid.to_csv(output_csv, index=False)