def get_pop_objs(E, S, T, min_yr, max_yr, curr_year, GraphDiag=True): ''' This function produces the demographics objects to be used in the OG-India model package. Args: E (int): number of model periods in which agent is not economically active, >= 1 S (int): number of model periods in which agent is economically active, >= 3 T (int): number of periods to be simulated in TPI, > 2*S min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 curr_year (int): current year for which analysis will begin, >= 2016 GraphDiag (bool): =True if want graphical output and printed diagnostics Returns: omega_path_S (Numpy array), time path of the population distribution from the current state to the steady-state, size T+S x S g_n_SS (scalar): steady-state population growth rate omega_SS (Numpy array): normalized steady-state population distribution, length S surv_rates (Numpy array): survival rates that correspond to each model period of life, lenght S mort_rates (Numpy array): mortality rates that correspond to each model period of life, length S g_n_path (Numpy array): population growth rates over the time path, length T + S ''' fert_rates = get_fert(E + S, min_yr, max_yr, graph=False) mort_rates, infmort_rate = get_mort(E + S, min_yr, max_yr, graph=False) mort_rates_S = mort_rates[-S:] imm_rates_orig = get_imm_resid(E + S, min_yr, max_yr, graph=False) OMEGA_orig = np.zeros((E + S, E + S)) OMEGA_orig[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack( (imm_rates_orig[0], np.zeros(E + S - 1)))) OMEGA_orig[1:, :-1] += np.diag(1 - mort_rates[:-1]) OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:]) # Solve for steady-state population growth rate and steady-state # population distribution by age using eigenvalue and eigenvector # decomposition eigvalues, eigvectors = np.linalg.eig(OMEGA_orig) g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1 eigvec_raw =\ eigvectors[:, (eigvalues[np.isreal(eigvalues)].real).argmax()].real omega_SS_orig = eigvec_raw / eigvec_raw.sum() # Generate time path of the nonstationary population distribution omega_path_lev = np.zeros((E + S, T + S)) cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file( cur_path, os.path.join('data', 'demographic', 'india_pop_data.csv')) pop_data = pd.read_csv(pop_file, encoding='utf-8') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] pop_2011 = np.array(pop_data_samp['2011'], dtype='f') # Generate the current population distribution given that E+S might # be less than max_yr-min_yr+1 age_per_EpS = np.arange(1, E + S + 1) pop_2011_EpS = pop_rebin(pop_2011, E + S) pop_2011_pct = pop_2011_EpS / pop_2011_EpS.sum() # Age most recent population data to the current year of analysis pop_curr = pop_2011_EpS.copy() data_year = 2011 pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ( (pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[-S:].sum() ) # g_n in 2011 pop_past = pop_curr # assume 2010-2011 pop # Age the data to the current year for per in range(curr_year - data_year): pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[-S:].sum()) pop_past = pop_curr pop_curr = pop_next # Generate time path of the population distribution omega_path_lev[:, 0] = pop_curr.copy() for per in range(1, T + S): pop_next = np.dot(OMEGA_orig, pop_curr) omega_path_lev[:, per] = pop_next.copy() pop_curr = pop_next.copy() # Force the population distribution after 1.5*S periods to be the # steady-state distribution by adjusting immigration rates, holding # constant mortality, fertility, and SS growth rates imm_tol = 1e-14 fixper = int(1.5 * S) omega_SSfx = (omega_path_lev[:, fixper] / omega_path_lev[:, fixper].sum()) imm_objs = (fert_rates, mort_rates, infmort_rate, omega_path_lev[:, fixper], g_n_SS) imm_fulloutput = opt.fsolve(immsolve, imm_rates_orig, args=(imm_objs), full_output=True, xtol=imm_tol) imm_rates_adj = imm_fulloutput[0] imm_diagdict = imm_fulloutput[1] omega_path_S = (omega_path_lev[-S:, :] / np.tile(omega_path_lev[-S:, :].sum(axis=0), (S, 1))) omega_path_S[:, fixper:] = \ np.tile(omega_path_S[:, fixper].reshape((S, 1)), (1, T + S - fixper)) g_n_path = np.zeros(T + S) g_n_path[0] = g_n_curr.copy() g_n_path[1:] = ((omega_path_lev[-S:, 1:].sum(axis=0) - omega_path_lev[-S:, :-1].sum(axis=0)) / omega_path_lev[-S:, :-1].sum(axis=0)) g_n_path[fixper + 1:] = g_n_SS omega_S_preTP = (pop_past.copy()[-S:]) / (pop_past.copy()[-S:].sum()) imm_rates_mat = np.hstack( (np.tile(np.reshape(imm_rates_orig[E:], (S, 1)), (1, fixper)), np.tile(np.reshape(imm_rates_adj[E:], (S, 1)), (1, T + S - fixper)))) if GraphDiag: # Check whether original SS population distribution is close to # the period-T population distribution omegaSSmaxdif = np.absolute(omega_SS_orig - (omega_path_lev[:, T] / omega_path_lev[:, T].sum())).max() if omegaSSmaxdif > 0.0003: print("POP. WARNING: Max. abs. dist. between original SS " + "pop. dist'n and period-T pop. dist'n is greater than" + " 0.0003. It is " + str(omegaSSmaxdif) + ".") else: print("POP. SUCCESS: orig. SS pop. dist is very close to " + "period-T pop. dist'n. The maximum absolute " + "difference is " + str(omegaSSmaxdif) + ".") # Plot the adjusted steady-state population distribution versus # the original population distribution. The difference should be # small omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max() if omegaSSvTmaxdiff > 0.0003: print("POP. WARNING: The maximimum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is" + str(omegaSSvTmaxdiff) + ", " + "which is greater than 0.0003.") else: print("POP. SUCCESS: The maximum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is " + str(omegaSSvTmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n") plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title('Original steady-state population distribution vs. fixed', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc='upper right') # Create directory if OUTPUT directory does not already exist ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' cur_path = os.path.split(os.path.abspath(__file__))[0] output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "OrigVsFixSSpop") plt.savefig(output_path) # Print whether or not the adjusted immigration rates solved the # zero condition immtol_solved = \ np.absolute(imm_diagdict['fvec'].max()) < imm_tol if immtol_solved: print("POP. SUCCESS: Adjusted immigration rates solved " + "with maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + ", which is less than the tolerance of " + str(imm_tol)) else: print("POP. WARNING: Adjusted immigration rates did not " + "solve. Maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + " is greater than the tolerance of " + str(imm_tol)) # Test whether the steady-state growth rates implied by the # adjusted OMEGA matrix equals the steady-state growth rate of # the original OMEGA matrix OMEGA2 = np.zeros((E + S, E + S)) OMEGA2[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack( (imm_rates_adj[0], np.zeros(E + S - 1)))) OMEGA2[1:, :-1] += np.diag(1 - mort_rates[:-1]) OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:]) eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2) g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1 if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10**(-8): print("FAILURE: The steady-state population growth rate" + " from adjusted OMEGA is different (diff is " + str(g_n_SS_adj - g_n_SS) + ") than the steady-" + "state population growth rate from the original" + " OMEGA.") elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10**(-8): print("SUCCESS: The steady-state population growth rate" + " from adjusted OMEGA is close to (diff is " + str(g_n_SS_adj - g_n_SS) + ") the steady-" + "state population growth rate from the original" + " OMEGA.") # Do another test of the adjusted immigration rates. Create the # new OMEGA matrix implied by the new immigration rates. Plug in # the adjusted steady-state population distribution. Hit is with # the new OMEGA transition matrix and it should return the new # steady-state population distribution omega_new = np.dot(OMEGA2, omega_SSfx) omega_errs = np.absolute(omega_new - omega_SSfx) print("The maximum absolute difference between the adjusted " + "steady-state population distribution and the " + "distribution generated by hitting the adjusted OMEGA " + "transition matrix is " + str(omega_errs.max())) # Plot the original immigration rates versus the adjusted # immigration rates immratesmaxdiff = \ np.absolute(imm_rates_orig - imm_rates_adj).max() print("The maximum absolute distance between any two points " + "of the original immigration rates and adjusted " + "immigration rates is " + str(immratesmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates") plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title('Original immigration rates vs. adjusted', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Imm. rates $i_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc='upper center') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "OrigVsAdjImm") plt.savefig(output_path) # Plot population distributions for data_year, curr_year, # curr_year+20, omega_SSfx, and omega_SS_orig fig, ax = plt.subplots() plt.plot(age_per_EpS, pop_2011_pct, label="2011 pop.") plt.plot(age_per_EpS, (omega_path_lev[:, 0] / omega_path_lev[:, 0].sum()), label=str(curr_year) + " pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(0.5 * S)] / omega_path_lev[:, int(0.5 * S)].sum()), label="T=" + str(int(0.5 * S)) + " pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(S)] / omega_path_lev[:, int(S)].sum()), label="T=" + str(int(S)) + " pop.") plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title('Population distribution at points in time path', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc='lower left') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "PopDistPath") plt.savefig(output_path) # return omega_path_S, g_n_SS, omega_SSfx, survival rates, # mort_rates_S, and g_n_path return (omega_path_S.T, g_n_SS, omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1 - mort_rates_S, mort_rates_S, g_n_path, imm_rates_mat.T, omega_S_preTP)
def get_fert(totpers, min_yr, max_yr, graph=True): ''' This function generates a vector of fertility rates by model period age that corresponds to the fertility rate data by age in years (Source: Office of the Registrar General & Census Commissioner: See Statement [Table] 19 of http://www.censusindia.gov.in/vital_statistics/SRS_Report_2016/ 7.Chap_3-Fertility_Indicators-2016.pdf) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: fert_rates (Numpy array): fertility rates for each model period of life ''' # Get current population data (2013) for weighting cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file( cur_path, os.path.join('data', 'demographic', 'india_pop_data.csv')) pop_data = pd.read_csv(pop_file, encoding='utf-8') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] age_year_all = pop_data_samp['Age'] + 1 curr_pop = np.array(pop_data_samp['2011'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum() # Get fertility rate by age-bin data fert_data = np.array([ 0.0, 1.0, 3.0, 10.7, 135.4, 166.0, 91.7, 32.7, 11.3, 4.1, 1.0, 0.0 ]) / 2000 age_midp = np.array([9, 12, 15, 17, 22, 27, 32, 37, 42, 47, 52, 57]) # Generate interpolation functions for fertility rates fert_func = si.interp1d(age_midp, fert_data, kind='cubic') # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers age_sub = (np.linspace( np.float64(binsize) / num_sub_bins, np.float64(max_yr), int(num_sub_bins * max_yr)) - 0.5 * np.float64(binsize) / num_sub_bins) curr_pop_sub = np.repeat( np.float64(curr_pop_pct) / num_sub_bins, num_sub_bins) fert_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1]) age_pred = age_sub[pred_ind] fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred)) fert_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) fert_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] * fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) if graph: ''' ---------------------------------------------------------------- age_fine_pred = (300,) vector, equally spaced support of ages between the minimum and maximum interpolating ages fert_fine_pred = (300,) vector, interpolated fertility rates based on age_fine_pred age_fine = (300+some,) vector of ages including leading and trailing zeros fert_fine = (300+some,) vector of fertility rates including leading and trailing zeros age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' # Generate finer age vector and fertility rate vector for # graphing cubic spline interpolating function age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) fert_fine_pred = fert_func(age_fine_pred) age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) fert_fine = np.hstack((0, fert_fine_pred, 0)) age_mid_new = ( np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(age_midp[3:-2], fert_data[3:-2], s=100, c='blue', marker='o', label='Data') plt.scatter(np.append(age_midp[:3], age_midp[-2:]), np.append(fert_data[:3], fert_data[-2:]), s=100, c='green', marker='o', label='Non-Data for fitting') plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d', label='Model period (interpolated)') plt.plot(age_fine, fert_fine, label='Cubic spline') # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted fertility rate function by age ($f_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Fertility rate $f_{s}$') plt.xlim((min_yr - 1, max_yr + 1)) plt.ylim( (-0.15 * (fert_fine_pred.max()), 1.15 * (fert_fine_pred.max()))) plt.legend(loc='upper right') plt.text(-13, -0.035, "Source: Census of India, 2016, Chapter 3, " + "Estimates of Fertility Indicators, Statement 20", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if not os.access(output_dir, os.F_OK): os.makedirs(output_dir) output_path = os.path.join(output_dir, "fert_rates") plt.savefig(output_path) return fert_rates
def get_imm_resid(totpers, min_yr, max_yr, graph=True): ''' Calculate immigration rates by age as a residual given population levels in different periods, then output average calculated immigration rate. We have to replace the first mortality rate in this function in order to adjust the first implied immigration rate (Source: India Census, 2001 and 2011) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: imm_rates (Numpy array):immigration rates that correspond to each period of life, length E+S ''' cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file( cur_path, os.path.join('data', 'demographic', 'india_pop_data.csv')) pop_data = pd.read_csv(pop_file, encoding='utf-8') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] age_year_all = pop_data_samp['Age'] + 1 pop_2001, pop_2011 = (np.array(pop_data_samp['2001'], dtype='f'), np.array(pop_data_samp['2011'], dtype='f')) pop_2001_EpS = pop_rebin(pop_2001, totpers) pop_2011_EpS = pop_rebin(pop_2011, totpers) # Create three years of estimated immigration rates for youngest age # individuals imm_mat = np.zeros((2, totpers)) fert_rates = get_fert(totpers, min_yr, max_yr, False) mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False) newbornvec = np.dot(fert_rates, pop_2001_EpS).T # imm_mat[:, 0] = ((pop_2011_EpS[0] - (1 - infmort_rate) * newbornvec) # / pop_2001_EpS[0]) imm_mat[:, 0] = 0 # Estimate immigration rates for all other-aged # individuals mort_rate10 = np.zeros_like(mort_rates[:-10]) # 10-year mort rate for i in range(10): mort_rate10 = mort_rates[i:-10 + i] + mort_rate10 mort_rate10[mort_rate10 > 1.0] = 1.0 imm_mat[:, 10:] = ((pop_2011_EpS[10:] - (1 - mort_rate10) * pop_2001_EpS[:-10]) / pop_2001_EpS[10:]) # Final estimated immigration rates are the averages over years imm_rates = imm_mat.mean(axis=0) neg_rates = imm_rates < 0 # For India, data were 10 years apart, so make annual rate imm_rates = ((1 + np.absolute(imm_rates))**(1 / 10)) - 1 imm_rates[neg_rates] = -1 * imm_rates[neg_rates] age_per = np.linspace(1, totpers, totpers) if graph: ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' fig, ax = plt.subplots() plt.scatter(age_per, imm_rates, s=40, c='red', marker='d') plt.plot(age_per, imm_rates) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted immigration rates by age ($i_{s}$), residual', # fontsize=20) plt.xlabel(r'Age $s$ (model periods)') plt.ylabel(r'Imm. rate $i_{s}$') plt.xlim((0, totpers + 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "imm_rates_orig") plt.savefig(output_path) return imm_rates
def get_mort(totpers, min_yr, max_yr, graph=False): ''' This function generates a vector of mortality rates by model period age. (Source: Census of India, 2011) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: mort_rates (Numpy array) mortality rates that correspond to each period of life infmort_rate (scalar): infant mortality rate from 2015 U.S. CIA World Factbook ''' # Get mortality rate by age data cur_path = os.path.split(os.path.abspath(__file__))[0] # Get current population data (2011) for weighting pop_file = utils.read_file( cur_path, os.path.join('data', 'demographic', 'india_pop_data.csv')) pop_data = pd.read_csv(pop_file, encoding='utf-8') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] age_year_all = pop_data_samp['Age'] + 1 curr_pop = np.array(pop_data_samp['2011'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum() # Get mortality rate by age data infmort_rate = 0.0482 # Get fertility rate by age-bin data mort_data = (np.array([ 2.9, 1.0, 0.7, 1.3, 1.6, 1.8, 2.3, 2.7, 4.0, 5.5, 8.3, 12.2, 20.1, 33.2, 49.9, 73.6, 104.8, 167.6 ]) / 1000) age_midp = np.array([ 2.5, 7, 12, 17, 22, 27, 32, 37, 42, 47, 52, 57, 62, 67, 72, 77, 82, 100 ]) # Generate interpolation functions for fertility rates mort_func = si.interp1d(age_midp, mort_data, kind='cubic') # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers age_sub = (np.linspace( np.float64(binsize) / num_sub_bins, np.float64(max_yr), int(num_sub_bins * max_yr)) - 0.5 * np.float64(binsize) / num_sub_bins) curr_pop_sub = np.repeat( np.float64(curr_pop_pct) / num_sub_bins, num_sub_bins) mort_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1]) age_pred = age_sub[pred_ind] mort_rates_sub[pred_ind] = np.float64(mort_func(age_pred)) mort_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) mort_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] * mort_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) mort_rates[-1] = 1 # Mortality rate in last period is set to 1 if graph: ''' ---------------------------------------------------------------- age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' age_mid_new = ( np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(np.hstack([0, age_midp]), np.hstack([infmort_rate, mort_data]), s=100, c='blue', marker='o', label='Data') plt.scatter(np.hstack([0, age_mid_new]), np.hstack([infmort_rate, mort_rates]), s=40, c='red', marker='d', label='Model period (interpolated)') plt.axvline(x=max_yr, color='black', linestyle='-', linewidth=1) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted mortality rate function by age ($rho_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Mortality rate $\rho_{s}$') plt.xlim((min_yr - 2, age_year_all.max() + 2)) plt.ylim((-0.05, 1.05)) plt.legend(loc='upper left') plt.text(-13, -0.30, "Source: Ministry of Health and Family " + "Welfare, Department of Health and Family Welfare", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "mort_rates") plt.savefig(output_path) return mort_rates, infmort_rate
def get_imm_resid(totpers, min_yr, max_yr, graph=True): ''' Calculate immigration rates by age as a residual given population levels in different periods, then output average calculated immigration rate. We have to replace the first mortality rate in this function in order to adjust the first implied immigration rate (Source: Population data come from Annual Estimates of the Resident Population by Single Year of Age and Sex: April 1, 2010 to July 1, 2013 (Both sexes) National Characteristics, Vintage 2013, US Census Bureau, http://www.census.gov/popest/data/national/asrh/2013/index.html) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: imm_rates (Numpy array):immigration rates that correspond to each period of life, length E+S ''' cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_csv(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] pop_2010, pop_2011, pop_2012, pop_2013 = ( np.array(pop_data_samp['2010'], dtype='f'), np.array(pop_data_samp['2011'], dtype='f'), np.array(pop_data_samp['2012'], dtype='f'), np.array(pop_data_samp['2013'], dtype='f')) pop_2010_EpS = pop_rebin(pop_2010, totpers) pop_2011_EpS = pop_rebin(pop_2011, totpers) pop_2012_EpS = pop_rebin(pop_2012, totpers) pop_2013_EpS = pop_rebin(pop_2013, totpers) # Create three years of estimated immigration rates for youngest age # individuals imm_mat = np.zeros((3, totpers)) pop11vec = np.array([pop_2010_EpS[0], pop_2011_EpS[0], pop_2012_EpS[0]]) pop21vec = np.array([pop_2011_EpS[0], pop_2012_EpS[0], pop_2013_EpS[0]]) fert_rates = get_fert(totpers, min_yr, max_yr, False) mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False) newbornvec = np.dot(fert_rates, np.vstack((pop_2010_EpS, pop_2011_EpS, pop_2012_EpS)).T) imm_mat[:, 0] = ((pop21vec - (1 - infmort_rate) * newbornvec) / pop11vec) # Estimate 3 years of immigration rates for all other-aged # individuals pop11mat = np.vstack((pop_2010_EpS[:-1], pop_2011_EpS[:-1], pop_2012_EpS[:-1])) pop12mat = np.vstack((pop_2010_EpS[1:], pop_2011_EpS[1:], pop_2012_EpS[1:])) pop22mat = np.vstack((pop_2011_EpS[1:], pop_2012_EpS[1:], pop_2013_EpS[1:])) mort_mat = np.tile(mort_rates[:-1], (3, 1)) imm_mat[:, 1:] = (pop22mat - (1 - mort_mat) * pop11mat) / pop12mat # Final estimated immigration rates are the averages over 3 years imm_rates = imm_mat.mean(axis=0) age_per = np.linspace(1, totpers, totpers) if graph: ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' fig, ax = plt.subplots() plt.scatter(age_per, imm_rates, s=40, c='red', marker='d') plt.plot(age_per, imm_rates) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted immigration rates by age ($i_{s}$), residual', # fontsize=20) plt.xlabel(r'Age $s$ (model periods)') plt.ylabel(r'Imm. rate $i_{s}$') plt.xlim((0, totpers + 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "imm_rates_orig") plt.savefig(output_path) # plt.show() return imm_rates
def get_fert(totpers, min_yr, max_yr, graph=False): ''' This function generates a vector of fertility rates by model period age that corresponds to the fertility rate data by age in years (Source: National Vital Statistics Reports, Volume 64, Number 1, January 15, 2015, Table 3, final 2013 data http://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_01.pdf) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: fert_rates (Numpy array): fertility rates for each model period of life ''' # Get current population data (2013) for weighting cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_csv(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] curr_pop = np.array(pop_data_samp['2013'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum() # Get fertility rate by age-bin data fert_data = (np.array([0.0, 0.0, 0.3, 12.3, 47.1, 80.7, 105.5, 98.0, 49.3, 10.4, 0.8, 0.0, 0.0]) / 2000) # Mid points of age bins age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47, 55, 56]) # Generate interpolation functions for fertility rates fert_func = si.interp1d(age_midp, fert_data, kind='cubic') # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers age_sub = (np.linspace(np.float64(binsize) / num_sub_bins, np.float64(max_yr), int(num_sub_bins*max_yr)) - 0.5 * np.float64(binsize) / num_sub_bins) curr_pop_sub = np.repeat(np.float64(curr_pop_pct) / num_sub_bins, num_sub_bins) fert_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1]) age_pred = age_sub[pred_ind] fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred)) fert_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) fert_rates[i] = (( curr_pop_sub[beg_sub_bin:end_sub_bin] * fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) if graph: ''' ---------------------------------------------------------------- age_fine_pred = (300,) vector, equally spaced support of ages between the minimum and maximum interpolating ages fert_fine_pred = (300,) vector, interpolated fertility rates based on age_fine_pred age_fine = (300+some,) vector of ages including leading and trailing zeros fert_fine = (300+some,) vector of fertility rates including leading and trailing zeros age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' # Generate finer age vector and fertility rate vector for # graphing cubic spline interpolating function age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) fert_fine_pred = fert_func(age_fine_pred) age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) fert_fine = np.hstack((0, fert_fine_pred, 0)) age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o', label='Data') plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d', label='Model period (integrated)') plt.plot(age_fine, fert_fine, label='Cubic spline') # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted fertility rate function by age ($f_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Fertility rate $f_{s}$') plt.xlim((min_yr - 1, max_yr + 1)) plt.ylim((-0.15 * (fert_fine_pred.max()), 1.15 * (fert_fine_pred.max()))) plt.legend(loc='upper right') plt.text(-5, -0.018, "Source: National Vital Statistics Reports, " + "Volume 64, Number 1, January 15, 2015.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "fert_rates") plt.savefig(output_path) return fert_rates
def get_mort(totpers, min_yr, max_yr, graph=False): ''' This function generates a vector of mortality rates by model period age. (Source: Male and Female death probabilities Actuarial Life table, 2011 Social Security Administration, http://www.ssa.gov/oact/STATS/table4c6.html) Args: totpers (int): total number of agent life periods (E+S), >= 3 min_yr (int): age in years at which agents are born, >= 0 max_yr (int): age in years at which agents die with certainty, >= 4 graph (bool): =True if want graphical output Returns: mort_rates (Numpy array) mortality rates that correspond to each period of life infmort_rate (scalar): infant mortality rate from 2015 U.S. CIA World Factbook ''' # Get mortality rate by age data infmort_rate = 0.00587 # taken from 2015 U.S. infant mortality rate cur_path = os.path.split(os.path.abspath(__file__))[0] mort_file = utils.read_file( cur_path, 'data/demographic/mort_rates2011.csv') mort_data = pd.read_csv(mort_file, sep=',', thousands=',') age_year_all = mort_data['Age'] + 1 mort_rates_all = ( ((mort_data['Male Mort. Rate'] * mort_data['Num. Male Lives']) + (mort_data['Female Mort. Rate'] * mort_data['Num. Female Lives'])) / (mort_data['Num. Male Lives'] + mort_data['Num. Female Lives'])) age_year_all = age_year_all[np.isfinite(mort_rates_all)] mort_rates_all = mort_rates_all[np.isfinite(mort_rates_all)] # Calculate implied mortality rates in sub-bins of mort_rates_all. mort_rates_mxyr = mort_rates_all[0:max_yr] num_sub_bins = int(100) len_subbins = ((np.float64((max_yr - min_yr + 1) * num_sub_bins)) / totpers) mort_rates_sub = np.zeros(num_sub_bins * max_yr, dtype=float) for i in range(max_yr): mort_rates_sub[i * num_sub_bins:(i + 1) * num_sub_bins] =\ (1 - ((1 - mort_rates_mxyr[i]) ** (1.0 / num_sub_bins))) mort_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) mort_rates[i] = ( 1 - (1 - (mort_rates_sub[beg_sub_bin:end_sub_bin])).prod()) mort_rates[-1] = 1 # Mortality rate in last period is set to 1 if graph: ''' ---------------------------------------------------------------- age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(np.hstack([0, age_year_all]), np.hstack([infmort_rate, mort_rates_all]), s=20, c='blue', marker='o', label='Data') plt.scatter(np.hstack([0, age_mid_new]), np.hstack([infmort_rate, mort_rates]), s=40, c='red', marker='d', label='Model period (cumulative)') plt.plot(np.hstack([0, age_year_all[min_yr - 1:max_yr]]), np.hstack([infmort_rate, mort_rates_all[min_yr - 1:max_yr]])) plt.axvline(x=max_yr, color='red', linestyle='-', linewidth=1) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted mortality rate function by age ($rho_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Mortality rate $\rho_{s}$') plt.xlim((min_yr-2, age_year_all.max()+2)) plt.ylim((-0.05, 1.05)) plt.legend(loc='upper left') plt.text(-5, -0.2, "Source: Actuarial Life table, 2011 Social Security " + "Administration.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "mort_rates") plt.savefig(output_path) # plt.show() return mort_rates, infmort_rate
def ss_profiles(base_ss, base_params, reform_ss=None, reform_params=None, by_j=True, var='nssmat', plot_data=False, plot_title=None, path=None): ''' Plot lifecycle profiles of given variable in the SS. Args: base_ss (dictionary): SS output from baseline run base_params (OG-India Specifications class): baseline parameters object reform_ss (dictionary): SS output from reform run reform_params (OG-India Specifications class): reform parameters object var (string): name of variable to plot plot_data (bool): whether to plot data values for given variable plot_title (string): title for plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of immigration rates ''' if reform_ss is not None: assert (base_params.S == reform_params.S) assert (base_params.starting_age == reform_params.starting_age) assert (base_params.ending_age == reform_params.ending_age) age_vec = np.arange(base_params.starting_age, base_params.starting_age + base_params.S) fig1, ax1 = plt.subplots() if by_j: cm = plt.get_cmap('coolwarm') ax1.set_prop_cycle(color=[cm(1. * i / 7) for i in range(7)]) for j in range(base_params.J): plt.plot(age_vec, base_ss[var][:, j], label='Baseline, j = ' + str(j)) if reform_ss is not None: plt.plot(age_vec, reform_ss[var][:, j], label='Reform, j = ' + str(j), linestyle='--') else: base_var = (base_ss[var][:, :] * base_params.lambdas.reshape(1, base_params.J)).sum(axis=1) plt.plot(age_vec, base_var, label='Baseline') if reform_ss is not None: reform_var = ( reform_ss[var][:, :] * reform_params.lambdas.reshape(1, reform_params.J)).sum(axis=1) plt.plot(age_vec, reform_var, label='Reform', linestyle='--') if plot_data: assert var == 'nssmat' labor_file = utils.read_file( cur_path, "data/labor/cps_hours_by_age_hourspct.txt") data = pd.read_csv(labor_file, header=0, delimiter='\t') piv = data.pivot(index='age', columns='hours_pct', values='mean_hrs') lab_mat_basic = np.array(piv) lab_mat_basic /= np.nanmax(lab_mat_basic) piv2 = data.pivot(index='age', columns='hours_pct', values='num_obs') weights = np.array(piv2) weights /= np.nansum(weights, axis=1).reshape(60, 1) weighted = np.nansum((lab_mat_basic * weights), axis=1) weighted = np.append(weighted, np.zeros(20)) weighted[60:] = np.nan plt.plot(age_vec, weighted, linewidth=2.0, label='Data', linestyle=':') plt.xlabel(r'Age') plt.ylabel(VAR_LABELS[var]) plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=2) if plot_title is not None: plt.title(plot_title, fontsize=15) if path is not None: fig_path1 = os.path.join(path) plt.savefig(fig_path1, bbox_inches="tight") else: return fig1 plt.close()