def decline_50_plus(ref, past_pop):
    loc_list = list(db.get_locations_by_level(3).location_id)

    ref_2017 = past_pop.sel(
        year_id=2017, age_group_id=22, location_id=loc_list, sex_id=3) / 1e9

    ref_2100 = ref.sel(
        year_id=2100, age_group_id=22, location_id=loc_list, sex_id=3).sel(
            scenario=0, drop=True) / 1e9

    change = (ref_2100 - ref_2017) / ref_2017
    mean_change = change.mean('draw')

    fifty_plus = change.mean('draw').where(
        change.mean('draw') < -0.5).dropna("location_id")

    china = change.sel(location_id=6).mean('draw')
    japan = change.sel(location_id=67).mean('draw')

    twentyfive_to_fifty = mean_change\
        .where(mean_change < -0.25).where(mean_change > -0.50)\
        .dropna("location_id")

    ref_slice = past_pop.sel(location_id=fifty_plus.location_id.values,
                             age_group_id=22,
                             sex_id=3,
                             year_id=2017)

    print(f"Number of countries decline greater than 50%: {len(fifty_plus)}"\
          f"China percent decline: {china.values.round(2)}"\
          f"Japan percent decline: {japan.values.round(2)}"\
          f"Number of countries decline between 25% - 50%: "\
          f"{len(twentyfive_to_fifty)}")

    return change
Пример #2
0
def super_region_pop(pop_da):
    locs = db.get_locations_by_level(1)[['location_id', 'location_name']]
    supreg_pop = pop_da.sel(location_id=locs.location_id.tolist())
    supreg_df = supreg_pop.sel(quantile="mean").drop(
        ["scenario", "sex_id", "age_group_id"]).to_dataframe().reset_index()

    peak_vals = supreg_df.groupby("location_id")["population"].max()
    peaks = supreg_df.merge(peak_vals).merge(locs)
    peak_years = peaks.year_id
    if (peak_years < 2100).all() != True:
        warnings.warn("Not all super regions peak!")

    supreg_17_100 = supreg_df[supreg_df.year_id.isin([2017, 2100])].drop(
        "quantile", axis=1).set_index(
            ["location_id",
             "year_id"]).unstack("year_id").droplevel(0, axis=1).reset_index()
    supreg_17_100["diff"] = supreg_17_100[2100] - supreg_17_100[2017]
    pos_diff_locs = supreg_17_100[
        supreg_17_100["diff"] > 0].location_id.tolist()
    supreg_higher100 = supreg_pop.sel(year_id=2100, location_id=pos_diff_locs)
    supreg_higher100_mean_ui = supreg_higher100.drop([
        "scenario", "sex_id", "age_group_id", "year_id"
    ]).to_dataframe().unstack().droplevel(0, axis=1).reset_index().merge(locs)

    print("SUPER-REGIONS WITH HIGHER POP IN 2100 THAN 2017 (IN 2100)")
    print(supreg_higher100_mean_ui)
    print("\nREMINDER: check plots for most severe declines\n")
Пример #3
0
def super_region_replacement(tfr_da):
    locs = db.get_locations_by_level(1)[['location_id', 'location_name']]
    tfr_df = tfr_da.drop("scenario").to_dataframe().reset_index()
    tfr_wide = tfr_df.set_index(
        ["location_id", "year_id", "quantile"]).unstack().droplevel(
        level=0, axis=1).reset_index()

    supreg_df = tfr_wide.merge(locs, on="location_id")

    below_rep_17 = supreg_df.query("year_id==2017 & mean<2.1").round(N_DECIMALS)
    below_rep_17_names = "; ".join(below_rep_17.location_name.unique())
    num_below_17 = len(below_rep_17.location_id.unique())

    supreg_100 = supreg_df.query("year_id==2100").round(N_DECIMALS)
    supreg_100_min = supreg_100[supreg_100["mean"]==supreg_100["mean"].min()]
    supreg_100_max = supreg_100[supreg_100["mean"]==supreg_100["mean"].max()]

    subsah = supreg_df.query("location_name=='Sub-Saharan Africa'").round(
        N_DECIMALS)
    subsah_17 = subsah.query("year_id==2017")
    subsah_100 = subsah.query("year_id==2100")
    subsah_1st_yr_below_rep = subsah.query("mean<2.1").year_id.min()

    print(
        f"{num_below_17} GBD super-regions had reached a below-replacement TFR "
        f"(<2·1) by 2017: {below_rep_17_names}. By 2100, our reference TFR "
        f"forecasts for the super-regions were between "
        f"{supreg_100_min['mean'].values[0]} (95% UI "
        f"{supreg_100_min.lower.values[0]}–{supreg_100_min.upper.values[0]}) "
        f"for {supreg_100_min.location_name.values[0]} "
        f"and {supreg_100_max['mean'].values[0]} ({supreg_100_max.lower.values[0]}– "
        f"{supreg_100_max.upper.values[0]}) for "
        f"{supreg_100_max.location_name.values[0]} "
        "(table 1). Sub-Saharan Africa is forecasted to decline from "
        f"{subsah_17['mean'].values[0]} in 2017 to {subsah_100['mean'].values[0]} "
        f"({subsah_100.lower.values[0]}–{subsah_100.upper.values[0]}) "
        f"in 2100 and reach below-replacement in {subsah_1st_yr_below_rep}.\n"
    )
Пример #4
0
from fbd_core.etl import resample
from fbd_core.etl.aggregator import Aggregator
from fbd_core.etl.transformation import expand_dimensions
from fbd_core.file_interface import FBDPath, open_xr, save_xr

import sys
sys.path.append(".")

import settings

ALL_AGE_ID = 22
BOTH_SEX_ID = 3

N_DECIMALS = 2

COUNTRIES = db.get_locations_by_level(3).location_id.values.tolist()


def _helper_round(val, divide_by_this):
    return round(val/divide_by_this, N_DECIMALS)

def _location_id_to_name(location_ids):
    locs = db.get_locations_by_max_level(3)[['location_id', 'location_name']]
    locs = locs[locs['location_id'].isin(location_ids)]
    location_names = locs['location_name'].tolist()

    return location_names


# The global total fertility rate (TFR) in 2100 is forecasted to be 1.66
# (95% UI xx–xx).
Пример #5
0
def tfr_subsaharan_africa(tfr_da):
    locs = db.get_locations_by_level(3)[['location_id', 'location_name',
                                         'super_region_name']]
    subsah_countries = locs.query(
        "super_region_name=='Sub-Saharan Africa'").location_id.tolist()
    subsah_100 = tfr_da.sel(location_id=subsah_countries, year_id=2100).drop(
        ["scenario", "year_id"])

    # specific countries
    nga_100 = subsah_100.sel(location_id=214) # nigeria
    nga_100_mean, nga_100_lower, nga_100_upper = get_quantiles_and_round(
        nga_100, N_DECIMALS)

    ner_100 = subsah_100.sel(location_id=213) # niger
    ner_100_mean, ner_100_lower, ner_100_upper = get_quantiles_and_round(
        ner_100, N_DECIMALS)

    sa_100 = subsah_100.sel(location_id=196) # south africa
    sa_100_mean, sa_100_lower, sa_100_upper = get_quantiles_and_round(
        sa_100, N_DECIMALS)

    # countries above replacement in 2100
    above_rep_100 = subsah_100.round(N_DECIMALS).to_dataframe().unstack().\
        transpose().reset_index().query("mean>2.1").merge(locs).\
        sort_values(by="mean", axis=0, ascending=False).reset_index(drop=True)

    abvrep_rows = [] # subset each rows
    for row in range(0, len(above_rep_100)):
        abvrep_rows.append(above_rep_100.iloc[row])

    abvrep_strings = [] # turn each row into a string
    for row in range(0, len(abvrep_rows)):
        if row == 0:
            string = (f"{abvrep_rows[0].location_name} with a TFR of "
                      f"{abvrep_rows[0]['mean']} ({abvrep_rows[0].lower}–"
                      f"{abvrep_rows[0].upper})")
        else:
            string = (f"{abvrep_rows[row].location_name} "
                      f"({abvrep_rows[row]['mean']} [{abvrep_rows[row].lower}–"
                      f"{abvrep_rows[row].upper}])")
        abvrep_strings.append(string) # make list of string

    abvrep_string = ", ".join(abvrep_strings) # join list to make final string

    # lowest 5 tfr countries
    lowest_5_100 = subsah_100.to_dataframe().unstack().transpose().reset_index().\
        nsmallest(5, "mean").reset_index().merge(locs).round(N_DECIMALS)
    # names ascending order
    lowest_ascending = ", ".join(lowest_5_100.location_name)
    # get range
    lowest_low = (f"{lowest_5_100.iloc[0]['mean']} ({lowest_5_100.iloc[0].lower}–"
                  f"{lowest_5_100.iloc[0].upper})")
    highest_low = (f"{lowest_5_100.iloc[4]['mean']} ({lowest_5_100.iloc[4].lower}–"
                   f"{lowest_5_100.iloc[4].upper})")

    print(
        "In 2017, estimates of TFR were 5·11, 7·08, and 2·29 in Nigeria, Niger, "
        "and South Africa, respectively; by 2100, our reference forecasts were "
        f"{nga_100_mean} (95% UI {nga_100_lower}–{nga_100_upper}), {ner_100_mean} "
        f"({ner_100_lower}–{ner_100_upper}), and {sa_100_mean} ({sa_100_lower}–"
        f"{sa_100_upper}) (table 1). Only four countries in sub-Saharan Africa "
        f"are forecasted to stay above replacement in 2100: {abvrep_string}. "
        f"Lowest TFR reference forecasts for sub-Saharan Africa in 2100 were for "
        f"{lowest_ascending}, all between {lowest_low} and {highest_low}.\n"
    )
Пример #6
0
def pop_largest_countries(pop_da):
    locs_w_supreg = db.get_locations_by_level(3)[[
        'location_id', 'location_name', "super_region_name"
    ]]
    locs = locs_w_supreg.drop("super_region_name", axis=1)

    largest_5_2100 = pop_da.sel(year_id=2100, location_id=COUNTRIES).drop(
        ["year_id", "age_group_id", "sex_id",
         "scenario"]).to_dataframe().unstack("quantile").droplevel(
             0, axis=1).reset_index().nlargest(5, "mean").merge(locs)

    print("LARGEST 5 COUNTRIES IN 2100")
    print(largest_5_2100)

    print("\nCHECK VIA PLOTS:\n"
          "'Nigeria is forecasted with continued growth through 2100 "
          "and is expected to be the 2nd most populous country by then. The "
          "reference forecasts for China and India peak well before 2050 and "
          "both countries thereafter follow steep declining trajectories...'")

    chn_peak_year = pop_da.sel(location_id=6, quantile="mean")
    chn_peak_year = chn_peak_year.where(chn_peak_year == chn_peak_year.max(),
                                        drop=True).year_id.values[0]
    chn_peak_yr_da = pop_da.sel(location_id=6,
                                year_id=chn_peak_year).drop("year_id")

    ind_peak_year = pop_da.sel(location_id=163, quantile="mean")
    ind_peak_year = ind_peak_year.where(ind_peak_year == ind_peak_year.max(),
                                        drop=True).year_id.values[0]
    ind_peak_yr_da = pop_da.sel(location_id=163,
                                year_id=ind_peak_year).drop("year_id")

    usa_peak_year = pop_da.sel(location_id=102, quantile="mean")
    usa_peak_year = usa_peak_year.where(usa_peak_year == usa_peak_year.max(),
                                        drop=True).year_id.values[0]
    usa_peak_yr_da = pop_da.sel(location_id=102,
                                year_id=usa_peak_year).drop("year_id")

    chn_ind_usa_peak = xr.concat(
        [chn_peak_yr_da, ind_peak_yr_da, usa_peak_yr_da], dim="location_id")
    chn_ind_usa_100 = pop_da.sel(year_id=2100, location_id=[6, 163, 102])

    pcnt_2100_to_peak_chn_ind_usa = (
        (chn_ind_usa_100 / chn_ind_usa_peak) * 100).round(N_DECIMALS).drop(
            ["year_id", "age_group_id", "sex_id",
             "scenario"]).to_dataframe().unstack("quantile").droplevel(
                 0, axis=1).reset_index().merge(locs)

    print("\nCHINA INDIA USA PCT OF PEAK IN 2100")
    print(pcnt_2100_to_peak_chn_ind_usa)

    print(
        "\nCHECK WITH TABLES BELOW:\n"
        "'The USA is projected to experience population growth until "
        "mid-century, followed by only moderate decline of less than 10% by "
        "2100. We forecast that the number of countries in sub-Saharan Africa "
        "among the countries with the 10 largest populations will increase "
        "from only Nigeria in 2017 to also include DR Congo, Ethiopia, and "
        "Niger in 2100.'")

    largest_10_2017 = pop_da.sel(year_id=2017, location_id=COUNTRIES).drop([
        "year_id", "age_group_id", "sex_id", "scenario"
    ]).to_dataframe().unstack("quantile").droplevel(
        0,
        axis=1).reset_index().nlargest(10, "mean").merge(locs_w_supreg).drop(
            ["lower", "upper"],
            axis=1).query("super_region_name == 'Sub-Saharan Africa'")

    print("\nSUB-SAHARAN COUNTRIES FROM 10 LARGEST COUNTRIES 2017")
    print(largest_10_2017)

    largest_10_2100 = pop_da.sel(year_id=2100, location_id=COUNTRIES).drop([
        "year_id", "age_group_id", "sex_id", "scenario"
    ]).to_dataframe().unstack("quantile").droplevel(
        0,
        axis=1).reset_index().nlargest(10, "mean").merge(locs_w_supreg).drop(
            ["lower", "upper"],
            axis=1).query("super_region_name == 'Sub-Saharan Africa'")

    print("\nSUB-SAHARAN COUNTRIES FROM 10 LARGEST COUNTRIES 2100")
    print(largest_10_2100)