def main(past_version, forecast_version, gbd_round_id, years): avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99 = prep_pop_da( past_version, forecast_version, gbd_round_id, years) plot_file = FBDPath( f"/{gbd_round_id}/future/population/{forecast_version}", root_dir="plot") plot_file.mkdir(exist_ok=True) pdf_file = plot_file / "figure_7_population_pyramids.pdf" location_metadata = db.get_locations_by_max_level(3) location_hierarchy = location_metadata.set_index( "location_id").to_xarray()["parent_id"] with PdfPages(pdf_file) as pdf: for l in location_hierarchy["location_id"]: fig = pop_plot(avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99, years, location_id=l) pdf.savefig(fig)
def prep_data(gdp_df, year_list): location_metadata = db.get_locations_by_max_level(3) location_dict = location_metadata.set_index( "location_id")["location_name"].to_dict() gdp = gdp_df.filter(["location_id", "year_id", "gdp"]) largest_by_year = pd.DataFrame() for year in year_list: gdp_year = gdp[gdp.year_id == year] \ .sort_values(by=['gdp'], ascending=False) \ .reset_index(drop=True) gdp_year["rank"] = gdp_year.index + 1 largest_by_year = largest_by_year.append(gdp_year) largest_by_year.location_id = largest_by_year.location_id.map( location_dict) largest_df = largest_by_year.pivot_table(values="gdp", index=["location_id", "rank"], columns="year_id") \ .reset_index() for year in year_list: largest_df[f"rank_{year}"] = largest_df.dropna(subset=[year])['rank'] largest_df = largest_df.fillna(0) ranked_df = largest_df.groupby("location_id") \ .sum() \ .filter(like="rank_") \ .reset_index() df = ranked_df.replace(0.0, np.nan) \ .sort_values(by="rank_2017") \ .reset_index(drop=True) region_dict = location_metadata.set_index( "location_name")["super_region_name"].to_dict() df['region'] = df.location_id.map(region_dict) ranked_data = pd.DataFrame() t = df.replace(0, np.nan) for year in year_list: tt = t.sort_values(by=f'rank_{year}')['location_id'] ranked_data[f'rank_{year}'] = tt.values return df, ranked_data
def low_lex_countries(lex_ref_version): lex_df = lex_ref_version.sel( sex_id=3, scenario=0, year_id=2100, age_group_id=2).rename("lex").to_dataframe().reset_index() lex_under_75_df = lex_df.query("lex < 75") location_metadata = db.get_locations_by_max_level(3)[[ "location_id", "location_name", "region_name" ]] lex_location_verbose_df = lex_under_75_df.merge(location_metadata) assert len(lex_location_verbose_df) == len(lex_under_75_df) print_statement = f"Country regions with life expectancy less than 75:" \ f"{lex_location_verbose_df.region_name.value_counts()}" print(print_statement) return lex_location_verbose_df
def _etl_total_emp(): location_metadata = db.get_locations_by_max_level(3) location_hierarchy = location_metadata.set_index( "location_id").to_xarray()["parent_id"] total_emp_files = \ ["/ihme/covariates/ubcov/model/output/54629/draws_temp_0/",#male "/ihme/covariates/ubcov/model/output/54758/draws_temp_0/"]#female total_emp = pd.DataFrame() col_list = ["location_id", "year_id", "age_group_id", "sex_id", "mean_emp"] for file in total_emp_files: for location_csv in glob.glob(file + "*.csv"): temp_csv = pd.read_csv(location_csv) temp_csv["mean_emp"] = temp_csv.filter(like="draw_").mean(axis=1) total_emp = total_emp.append(temp_csv[col_list].query( "location_id in @location_hierarchy.location_id")) total_emp_ds = total_emp.set_index(["location_id", "year_id", "sex_id", "age_group_id"]).to_xarray().mean_emp assert xr.ufuncs.isfinite(total_emp_ds).all() return total_emp_ds
def get_agg(forecast_pop, da, gbd_round_id): """Use aggregator function to get aggregated births or deaths. Args: forecast_pop (xarray.DataArray): Forecast populations. da (xarray.DataArray): Mortality or ASFR forecasts gbd_round_id (int): The GBD round fed into FBDPath. Returns: (xarray.DataArray): Livebirths or deaths. """ loc = db.get_locations_by_max_level(3) loc_hierarchy = loc.set_index("location_id").to_xarray()["parent_id"] da.attrs["metric"] = "rate" region_scalars = aggregator._get_regional_correction_factors(gbd_round_id) agg = aggregator.Aggregator(forecast_pop) output = agg.aggregate_locations(loc_hierarchy, data=da, correction_factor=region_scalars) output = output.number return output
def _location_id_to_name(location_ids): locs = db.get_locations_by_max_level(3)[['location_id', 'location_name']] locs = locs[locs['location_id'].isin(location_ids)] location_names = locs['location_name'].tolist() return location_names
def pop_plot(avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99, years, location_id=1): location_metadata = db.get_locations_by_max_level(3) ages = db.get_ages().query("age_group_id in @ALL_AGE_GROUP_IDS") scenarios = [ { "year": years.past_end, "scenario": 0, "name": years.past_end, "color": "black" }, { "year": years.forecast_end, "scenario": 0, "name": "Reference", "color": "steelblue" }, { "year": years.forecast_end, "scenario": -1, "name": "Slower Met Need and Education Pace", "color": "firebrick" }, { "year": years.forecast_end, "scenario": 1, "name": "Faster Met Need and Education Pace", "color": "forestgreen" }, ] alt_scenario_sdg = [{ "year": years.forecast_end, "scenario": 3, "name": "SDG Met Need and Education Pace", "color": "#ff7f00" }] alt_scenario_99 = [{ "year": years.forecast_end, "scenario": 2, "name": "Fastest Met Need and Education Pace", "color": "#984ea3" }] gs = plt.GridSpec(1, 2) ax_male = plt.subplot(gs[:, 0]) ax_female = plt.subplot(gs[:, 1]) fig = ax_female.figure # pop plots pop_df = ds.sel( location_id=location_id, age_group_id=ages["age_group_id"].values )["population"].to_dataframe().reset_index().sort_values("age_group_id") alt_df_sdg = ds_sdg.sel( location_id=location_id, age_group_id=ages["age_group_id"].values )["population"].to_dataframe().reset_index().sort_values("age_group_id") alt_df_99 = ds_99.sel( location_id=location_id, age_group_id=ages["age_group_id"].values )["population"].to_dataframe().reset_index().sort_values("age_group_id") pop_df["age"] = pop_df["age_group_id"].factorize()[0] male_pop = pop_df[pop_df.sex_id == 1] male_max_pop = male_pop["population"].max() * 1.01 female_pop = pop_df[pop_df.sex_id == 1] female_max_pop = female_pop["population"].max() * 1.01 # sdg alt_df_sdg["age"] = alt_df_sdg["age_group_id"].factorize()[0] # 99 alt_df_99["age"] = alt_df_99["age_group_id"].factorize()[0] # comment or uncomment for fixed/non-fixed axis max_pop = max([male_max_pop, female_max_pop]) # max_pop = pop_df["population"].max() * 1.01 if max_pop > 1e9: max_pop = max_pop / 1e9 pop_df["population"] = pop_df["population"] / 1e9 alt_df_sdg["population"] = alt_df_sdg["population"] / 1e9 alt_df_99["population"] = alt_df_99["population"] / 1e9 label = "Population (Billions)" elif max_pop > 1e6: max_pop = max_pop / 1e6 pop_df["population"] = pop_df["population"] / 1e6 alt_df_sdg["population"] = alt_df_sdg["population"] / 1e6 alt_df_99["population"] = alt_df_99["population"] / 1e6 label = "Population (Millions)" elif max_pop > 1e3: max_pop = max_pop / 1e3 pop_df["population"] = pop_df["population"] / 1e3 alt_df_sdg["population"] = alt_df_sdg["population"] / 1e3 alt_df_99["population"] = alt_df_99["population"] / 1e3 label = "Population (Thousands)" else: label = "Population" # male plot for c in scenarios: df = pop_df.query( "sex_id == 1 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_male.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8, label=c["name"]) a = avg_age_fhs.sel(location_id=location_id, sex_id=1, scenario=c["scenario"], year_id=c["year"]) ax_male.plot(0, a / 5 + 0.5, marker="<", color=c["color"], markersize=20, alpha=0.8) # sdg for c in alt_scenario_sdg: df = alt_df_sdg.query( "sex_id == 1 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_male.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8, label=c["name"]) a = avg_age_sdg.sel(location_id=location_id, sex_id=1, scenario=c['scenario'], year_id=c["year"]) ax_male.plot(0, a / 5 + 0.5, marker="<", color=c["color"], markersize=20, alpha=0.8) # 99 for c in alt_scenario_99: df = alt_df_99.query( "sex_id == 1 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_male.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8, label=c["name"]) a = avg_age_99.sel(location_id=location_id, sex_id=1, scenario=c['scenario'], year_id=c["year"]) ax_male.plot(0, a / 5 + 0.5, marker="<", color=c["color"], markersize=20, alpha=0.8) ax_male.set_xlim(max_pop + .1 * max_pop, 0) ax_male.set_ylim(0, ages.shape[0]) ax_male.set_yticks(np.arange(ages.shape[0]) + 0.5) ax_male.set_yticklabels(ages["age_group_name_short"], fontsize=14) ax_male.set_title("Male", fontsize=18) ax_male.set_xlabel(label) ax_male.legend(frameon=False, loc="upper left") sns.despine(ax=ax_male, left=True, right=False) # female plot for c in scenarios: df = pop_df.query( "sex_id == 2 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_female.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8) a = avg_age_fhs.sel(location_id=location_id, sex_id=2, scenario=c["scenario"], year_id=c["year"]) ax_female.plot(0, a / 5 + 0.5, marker=">", color=c["color"], markersize=20, alpha=0.8) # sdg for c in alt_scenario_sdg: df = alt_df_sdg.query( "sex_id == 2 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_female.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8) a = avg_age_sdg.sel(location_id=location_id, sex_id=2, scenario=c['scenario'], year_id=c["year"]) ax_female.plot(0, a / 5 + 0.5, marker=">", color=c["color"], markersize=20, alpha=0.8) # 99 for c in alt_scenario_99: df = alt_df_99.query( "sex_id == 2 & scenario == {} & year_id == {}".format( c["scenario"], c["year"])) ax_female.step(x=df["population"].values.tolist() + [0], y=df["age"].values.tolist() + [ages.shape[0]], color=c["color"], linewidth=2, alpha=0.8) a = avg_age_99.sel(location_id=location_id, sex_id=2, scenario=c['scenario'], year_id=c["year"]) ax_female.plot(0, a / 5 + 0.5, marker=">", color=c["color"], markersize=20, alpha=0.8) ax_female.set_xlim(0, max_pop + .1 * max_pop) ax_female.set_ylim(0, ages.shape[0]) ax_female.set_yticks(np.arange(ages.shape[0]) + 0.5) ax_female.set_yticklabels([]) ax_female.set_title("Female", fontsize=18) ax_female.set_xlabel(label) sns.despine(ax=ax_female) fig.suptitle(location_metadata.query("location_id == @location_id") ["location_name"].values[0], fontsize=28) fig.set_size_inches(14, 8) plt.tight_layout() plt.subplots_adjust(top=.85) return fig
def main(gdp_version): gdp = load_data(gdp_version) data, ranked_data = prep_data(gdp, YEAR_LIST) plot_dir = FBDPath(f"/{GBD_ROUND_ID}/future/gdp/{gdp_version}/", root_dir='plot') plot_dir.mkdir(parents=True, exist_ok=True) location_metadata = db.get_locations_by_max_level(3) region_dict = location_metadata.set_index( "location_name")["super_region_name"].to_dict() title = 'Top 25 Nations by Total GDP' plot_file = plot_dir / "table_1_2017_arrow_diagram.pdf" c = canvas.Canvas(str(plot_file), pagesize=(792.0, 612.0)) # text size and style titletextsize = 12 headertextsize = 10 textsize = 8 textgap = textsize * 2.0 # # write title titley = 625 row1 = titley - (2.0 * textgap) row1_and_ahalf = titley - (3.0 * textgap) row2 = row1 - (2.0 * textgap) c.setFont("Helvetica-Bold", titletextsize) c.drawString(315, titley, "{title}".format(title=title)) # write column headers c.setFont("Helvetica-Bold", textsize) year2017_columnwidth = 100 gap = 500 year2030_columnwidth = 100 year2050_columnwidth = 100 year2100_columnwidth = 100 # set columns widths (counting from left to right) year2017_column = 70 + 3 year2030_column = (year2017_column + year2017_columnwidth + 80) year2050_column = (year2030_column + year2030_columnwidth + 80) year2100_column = (year2050_column + year2050_columnwidth + 80) c.setFont("Helvetica-Bold", headertextsize) # name columns textobject_year2017 = c.beginText(year2017_column + 40, row1_and_ahalf) for line in ["", f"{2017}"]: textobject_year2017.textLine(line) c.drawText(textobject_year2017) textobject_year2030 = c.beginText(year2030_column + 40, row1_and_ahalf) for line in ["", f"{2030}"]: textobject_year2030.textLine(line) c.drawText(textobject_year2030) textobject_year2050 = c.beginText(year2050_column + 40, row1_and_ahalf) for line in ["", f"{2050}"]: textobject_year2050.textLine(line) c.drawText(textobject_year2050) textobject_year2100 = c.beginText(year2100_column + 40, row1_and_ahalf) for line in ["", f"{2100}"]: textobject_year2100.textLine(line) c.drawText(textobject_year2100) # unknown territory total_iter = 1 # country position (after top 25) countryposition_2017 = 26 countryposition_2030 = 26 countryposition_2050 = 26 countryposition_2100 = 26 lineposition_2017 = 26 lineposition_2030 = 26 lineposition_2050 = 26 lineposition_2100 = 26 for index in ranked_data['rank_2017'].unique(): row_data = ranked_data.query( 'rank_2017 == @index').reset_index().iloc[0] rank2017 = row_data['index'] + 1 label2017 = row_data['rank_2017'] rank2030 = row_data['index'] + 1 label2030 = row_data['rank_2030'] rank2050 = row_data['index'] + 1 label2050 = row_data['rank_2050'] rank2100 = row_data['index'] + 1 label2100 = row_data['rank_2100'] c.setFont("Helvetica", textsize) # determine rank change this_rank = label2017 line_start_2017 = data.query( "location_id == @this_rank")['rank_2017'].values[0] line_end_2030 = data.query( "location_id == @this_rank")['rank_2030'].values[0] line_end_2050 = data.query( "location_id == @this_rank")['rank_2050'].values[0] line_end_2100 = data.query( "location_id == @this_rank")['rank_2100'].values[0] # draw rectangles if total_iter < 26: # line style c.setDash(1, 0) # stroke colour c.setStrokeColorRGB(0, 0, 0) # 2017 region = region_dict[label2017] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - (total_iter * textgap) - 2.5, year2017_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2030 region = region_dict[label2030] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column, row2 - (total_iter * textgap) - 2.5, year2030_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2050 region = region_dict[label2050] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2050_column, row2 - (total_iter * textgap) - 2.5, year2050_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2100 region = region_dict[label2100] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2100_column, row2 - (total_iter * textgap) - 2.5, year2100_columnwidth, textsize * 2.0, stroke=1, fill=1) # draw country names c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) if (line_start_2017 > 25 and line_end_2030 < 26): c.drawString(year2017_column + 20, row2 - (countryposition_2017 * textgap) + 2.5, f"{int(rank2017)} {label2017}") countryposition_2017 += 1 if ((line_start_2017 < 26) and (line_end_2030 > 25)): c.drawString(year2030_column + 20, row2 - (countryposition_2030 * textgap) + 2.5, f"{int(line_end_2030)} {label2017}") countryposition_2030 += 1 if ((line_end_2030 > 25) and (line_end_2050 < 26)): label = data.query( "rank_2050 == @line_end_2050")['location_id'].values[0] c.drawString(year2030_column + 20, row2 - (countryposition_2030 * textgap) + 2.5, f"{int(line_end_2030)} {label}") countryposition_2030 += 1 if ((line_end_2030 < 26) and (line_end_2050 > 25)): label = data.query( "rank_2030 == @line_end_2030")['location_id'].values[0] c.drawString(year2050_column + 20, row2 - (countryposition_2050 * textgap) + 2.5, f"{int(line_end_2050)} {label}") countryposition_2050 += 1 if ((line_end_2050 > 25) and (line_end_2100 < 26)): label = data.query( "rank_2100 == @line_end_2100")['location_id'].values[0] c.drawString(year2050_column + 20, row2 - (countryposition_2050 * textgap) + 2.5, f"{int(line_end_2050)} {label}") countryposition_2050 += 1 if ((line_end_2050 < 26) and (line_end_2100 > 25)): label = data.query( "rank_2050 == @line_end_2050")['location_id'].values[0] c.drawString(year2100_column + 20, row2 - (countryposition_2100 * textgap) + 2.5, f"{int(line_end_2100)} {label}") countryposition_2100 += 1 if total_iter < 26: c.drawString(year2017_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2017} {label2017}") c.drawString(year2030_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2030} {label2030}") c.drawString(year2050_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2050} {label2050}") c.drawString(year2100_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2100} {label2100}") # determine line type and draw c.setStrokeColorRGB(0, 0, 0) if line_start_2017 > line_end_2030: c.setDash(1, 0) else: c.setDash(3, 1) if (line_start_2017 > 25) and (line_end_2030 < 26): c.line(year2017_column + year2017_columnwidth, row2 - (lineposition_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (line_end_2030 * textgap) + (0.33 * textsize)) lineposition_2017 += 1 elif (line_start_2017 < 26) and (line_end_2030 > 25): c.line(year2017_column + year2017_columnwidth, row2 - (line_start_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (lineposition_2030 * textgap) + (0.33 * textsize)) lineposition_2030 += 1 elif (line_start_2017 < 26) or (line_end_2030 < 26): c.line(year2017_column + year2017_columnwidth, row2 - (line_start_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (line_end_2030 * textgap) + (0.33 * textsize)) # col2-3 if line_end_2030 > line_end_2050: c.setDash(1, 0) else: c.setDash(3, 1) if (line_end_2030 > 25) and (line_end_2050 < 26): c.line(year2030_column + year2030_columnwidth, row2 - (lineposition_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (line_end_2050 * textgap) + (0.33 * textsize)) lineposition_2030 += 1 elif ((line_end_2030 < 26) and (line_end_2050 > 25)): c.line(year2030_column + year2030_columnwidth, row2 - (line_end_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (lineposition_2050 * textgap) + (0.33 * textsize)) lineposition_2050 += 1 elif (line_end_2030 < 26) or (line_end_2050 < 26): c.line(year2030_column + year2030_columnwidth, row2 - (line_end_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (line_end_2050 * textgap) + (0.33 * textsize)) # col3-4 if line_end_2050 > line_end_2100: c.setDash(1, 0) else: c.setDash(3, 1) if ((line_end_2050 > 25) and (line_end_2100 < 26)): c.line(year2050_column + year2050_columnwidth, row2 - (lineposition_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (line_end_2100 * textgap) + (0.33 * textsize)) lineposition_2050 += 1 elif ((line_end_2050 < 26) and (line_end_2100 > 25)): c.line(year2050_column + year2050_columnwidth, row2 - (line_end_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (lineposition_2100 * textgap) + (0.33 * textsize)) lineposition_2100 += 1 elif (line_end_2050 < 26) or (line_end_2100 < 26): c.line(year2050_column + year2050_columnwidth, row2 - (line_end_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (line_end_2100 * textgap) + (0.33 * textsize)) # iterate total_iter = total_iter + 1 # 2017 rect_loc = 31 region = "High-income" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - (rect_loc * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - (rect_loc * textgap) + 2.5, "High-income") region = "Southeast Asia, East Asia, and Oceania" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - (rect_loc * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - (rect_loc * textgap) + 2.5, f"{region}") region = "South Asia" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 1) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 1) * textgap) + 2.5, "South Asia") region = "Latin America and Caribbean" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - ((rect_loc + 1) * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - ((rect_loc + 1) * textgap) + 2.5, "Latin America and Caribbean") region = "Central Europe, Eastern Europe, and Central Asia" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - ((rect_loc + 2) * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - ((rect_loc + 2) * textgap) + 2.5, "Central Europe, Eastern Europe, and Central Asia") region = "North Africa and Middle East" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 2) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 2) * textgap) + 2.5, "North Africa and Middle East") region = "Sub-Saharan Africa" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 3) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 3) * textgap) + 2.5, "Sub-Saharan Africa") c.save()
def make_tfr_and_agg(asfr_version, pop_version, gbd_round_id, years, model, hyperparam, **kwargs): """ From asfr and pop, make asfr_agg, tfr, and tfr_agg, and export files for pipeline and plotting needs. Args: asfr_version (str): intercept-shifted asfr version where an "asfr.nc" with both past and future is present. pop_version (str): future pop version to use for agg. gbd_round_id (int): gbd round id. years (YearRange): past_start:forecast_start:forecast_end. """ pop_fbd_path = FBDPath(gbd_round_id=gbd_round_id, past_or_future="future", stage="population", version=pop_version) # only need females for fertility studies pop = open_xr(pop_fbd_path / "population.nc").data.\ sel(sex_id=2, year_id=years.forecast_years) agg = Aggregator(pop) locs = db.get_locations_by_max_level(3) hierarchy = locs[["location_id", "parent_id"]].\ set_index("location_id").to_xarray().parent_id asfr_fbd_path = FBDPath(gbd_round_id=gbd_round_id, past_or_future="future", stage="asfr", version=asfr_version) asfr = open_xr(asfr_fbd_path / "asfr.nc").data.\ sel(year_id=years.forecast_years) asfr_agg = agg.aggregate_locations(hierarchy, data=asfr).rate # Calculate TFR tfr = calc_tfr_from_asfr(asfr) tfr_agg = calc_tfr_from_asfr(asfr_agg) # Saving to .nc files asfr.name = "value" tfr.name = "value" asfr_agg.name = "value" tfr_agg.name = "value" LOGGER.info("saving asfr_agg, tfr, tfr_agg to .nc") save_xr(asfr_agg, asfr_fbd_path / "asfr_agg_based_on_preliminary_pop.nc", metric="rate", space="identity", asfr_version=asfr_version, pop_version=pop_version) tfr_fbd_path = FBDPath(gbd_round_id=gbd_round_id, past_or_future="future", stage="tfr", version=asfr_version) save_xr(tfr, tfr_fbd_path / "tfr.nc", metric="rate", space="identity", asfr_version=asfr_version) save_xr(tfr_agg, tfr_fbd_path / "tfr_agg_based_on_preliminary_pop.nc", metric="rate", space="identity", asfr_version=asfr_version, pop_version=pop_version) print("Saving Quantiles and Means to .csv") asfr.mean("draw").to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_mean.csv", index=False) asfr_quantiles = asfr.quantile([0.025, 0.975], "draw") asfr_quantiles.sel(quantile=0.025).to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_lower.csv", index=False) asfr_quantiles.sel(quantile=0.975).to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_upper.csv", index=False) asfr_agg.mean("draw").to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_agg_based_on_preliminary_pop_mean.csv", index=False) asfr_agg_quantiles = asfr_agg.quantile([0.025, 0.975], "draw") asfr_agg_quantiles.sel(quantile=0.025).to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_agg_based_on_preliminary_pop_lower.csv", index=False) asfr_agg_quantiles.sel(quantile=0.975).to_dataframe().reset_index().\ to_csv(asfr_fbd_path / "asfr_agg_based_on_preliminary_pop_upper.csv", index=False) tfr.mean("draw").to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_mean.csv", index=False) tfr_quantiles = tfr.quantile([0.025, 0.975], "draw") tfr_quantiles.sel(quantile=0.025).to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_lower.csv", index=False) tfr_quantiles.sel(quantile=0.975).to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_upper.csv", index=False) tfr_agg.mean("draw").to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_agg_based_on_preliminary_pop_mean.csv", index=False) tfr_agg_quantiles = tfr_agg.quantile([0.025, 0.975], "draw") tfr_agg_quantiles.sel(quantile=0.025).to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_agg_based_on_preliminary_pop_lower.csv", index=False) tfr_agg_quantiles.sel(quantile=0.975).to_dataframe().reset_index().\ to_csv(tfr_fbd_path / "tfr_agg_based_on_preliminary_pop_upper.csv", index=False)
def _add_location_name(df): locs = db.get_locations_by_max_level(3)[['location_id', 'location_name']] df = df.merge(locs, how='left', on='location_id') return df