def test_plot_income_data_save_fig(tmpdir): ages = np.linspace(20 + 0.5, 100 - 0.5, 80) abil_midp = np.array([0.125, 0.375, 0.6, 0.75, 0.85, 0.945, 0.995]) abil_pcts = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]) age_wgts = np.ones(80) * 1 / 80 emat = income.get_e_orig(age_wgts, abil_pcts) parameter_plots.plot_income_data( ages, abil_midp, abil_pcts, emat, output_dir=tmpdir) img1 = mpimg.imread(os.path.join(tmpdir, 'ability_3D_lev.png')) img2 = mpimg.imread(os.path.join(tmpdir, 'ability_3D_log.png')) img3 = mpimg.imread(os.path.join(tmpdir, 'ability_2D_log.png')) assert isinstance(img1, np.ndarray) assert isinstance(img2, np.ndarray) assert isinstance(img3, np.ndarray)
def test_plot_income_data(): ages = np.linspace(20 + 0.5, 100 - 0.5, 80) abil_midp = np.array([0.125, 0.375, 0.6, 0.75, 0.85, 0.945, 0.995]) abil_pcts = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]) age_wgts = np.ones(80) * 1 / 80 emat = income.get_e_orig(age_wgts, abil_pcts) fig = parameter_plots.plot_income_data( ages, abil_midp, abil_pcts, emat) assert fig
def get_e_interp(S, age_wgts, age_wgts_80, abil_wgts, plot=False): ''' This function takes a source matrix of lifetime earnings profiles (abilities, emat) of size (80, 7), where 80 is the number of ages and 7 is the number of ability types in the source matrix, and interpolates new values of a new S x J sized matrix of abilities using linear interpolation. [NOTE: For this application, cubic spline interpolation introduces too much curvature.] This function also includes the two cases in which J = 9 and J = 10 that include higher lifetime earning percentiles calibrated using Piketty and Saez (2003). Args: S (int): number of ages to interpolate. This method assumes that ages are evenly spaced between the beginning of the 21st year and the end of the 100th year, >= 3 age_wgts (Numpy array): distribution of population in each age for the interpolated ages, length S age_wgts_80 (Numpy array): percent of population in each one-year age from 21 to 100, length 80 abil_wgts (Numpy array): distribution of population in each ability group, length J plot (bool): if True, creates plots of emat_orig and the new interpolated emat_new Returns: emat_new_scaled (Numpy array): interpolated ability matrix scaled so that population-weighted average is 1, size SxJ ''' # Get original 80 x 7 ability matrix abil_wgts_orig = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]) emat_orig = get_e_orig(age_wgts_80, abil_wgts_orig, plot) if (S == 80 and np.array_equal( np.squeeze(abil_wgts), np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])) is True): emat_new_scaled = emat_orig elif (S == 80 and np.array_equal( np.squeeze(abil_wgts), np.array([ 0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.005, 0.004, 0.0009, 0.0001 ])) is True): emat_new = np.zeros((S, len(abil_wgts))) emat_new[:, :7] = emat_orig # Create profiles for top 0.5%, top 0.1% and top 0.01% using # Piketty and Saez estimates # (https://eml.berkeley.edu/~saez/pikettyqje.pdf) # updated for 2018 to create scaling factor # assumption is that profile shape of these top 3 groups are # same as the top 1% estimated in tax data, just scaled up by # ratio determined from P&S 2018 estimates (Table 0, ex cap gains) emat_new[:, 6] = emat_orig[:, -1] * 0.458759521 emat_new[:, 7] = emat_orig[:, -1] * 0.847252448 emat_new[:, 8] = emat_orig[:, -1] * 2.713698465 emat_new[:, 9] = emat_orig[:, -1] * 18.74863983 emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(80, 1) * abil_wgts.reshape(1, 10)).sum() elif (S == 80 and np.array_equal( np.squeeze(abil_wgts), np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.005, 0.004, 0.001])) is True): emat_new = np.zeros((S, len(abil_wgts))) emat_new[:, :7] = emat_orig # Create profiles for top 0.5%, top 0.1% using # Piketty and Saez estimates # (https://eml.berkeley.edu/~saez/pikettyqje.pdf) # updated for 2018 to create scaling factor # assumption is that profile shape of these top 3 groups are # same as the top 1% estimated in tax data, just scaled up by # ratio determined from P&S 2018 estimates (Table 0, ex cap gains) emat_new[:, 6] = emat_orig[:, -1] * 0.458759521 emat_new[:, 7] = emat_orig[:, -1] * 0.847252448 emat_new[:, 8] = emat_orig[:, -1] * 4.317192601 emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(80, 1) * abil_wgts.reshape(1, 9)).sum() else: # generate abil_midp vector J = abil_wgts.shape[0] abil_midp = np.zeros(J) pct_lb = 0.0 for j in range(J): abil_midp[j] = pct_lb + 0.5 * abil_wgts[j] pct_lb += abil_wgts[j] # Make sure that values in abil_midp are within interpolating # bounds set by the hard coded abil_wgts_orig if abil_midp.min() < 0.125 or abil_midp.max() > 0.995: err = ("One or more entries in abils vector is outside the " + "allowable bounds.") raise RuntimeError(err) emat_j_midp = np.array( [0.125, 0.375, 0.600, 0.750, 0.850, 0.945, 0.995]) emat_s_midp = np.linspace(20.5, 99.5, 80) emat_j_mesh, emat_s_mesh = np.meshgrid(emat_j_midp, emat_s_midp) newstep = 80 / S new_s_midp = np.linspace(20 + 0.5 * newstep, 100 - 0.5 * newstep, S) new_j_mesh, new_s_mesh = np.meshgrid(abil_midp, new_s_midp) newcoords = np.hstack((emat_s_mesh.reshape( (80 * 7, 1)), emat_j_mesh.reshape((80 * 7, 1)))) emat_new = si.griddata(newcoords, emat_orig.flatten(), (new_s_mesh, new_j_mesh), method='linear') emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(S, 1) * abil_wgts.reshape(1, J)).sum() if plot: kwargs = {'filesuffix': '_intrp_scaled'} pp.plot_income_data(new_s_midp, abil_midp, abil_wgts, emat_new_scaled, OUTPUT_DIR, **kwargs) return emat_new_scaled
def get_e_orig(age_wgts, abil_wgts, plot=False): r''' This function generates the 80 x 7 matrix of lifetime earnings ability profiles, corresponding to annual ages from 21 to 100 and to paths based on income percentiles 0-25, 25-50, 50-70, 70-80, 80-90, 90-99, 99-100. The ergodic population distribution is an input in order to rescale the paths so that the weighted average equals 1. The data come from the following file: `data/ability/FR_wage_profile_tables.xlsx` The polynomials are of the form .. math:: \ln(abil) = \alpha + \beta_{1}\text{age} + \beta_{2}\text{age}^2 + \beta_{3}\text{age}^3 Values come from regression analysis using IRS CWHS with hours imputed from the CPS. Args: age_wgts (Numpy array): ergodic age distribution, length S abil_wgts (Numpy array): population weights in each lifetime earnings group, length J plot (bool): if True, generates 3D plots of ability paths Returns: e_orig_scaled (Numpy array): = lifetime ability profiles scaled so that population-weighted average is 1, size SxJ ''' # Return and error if age_wgts is not a vector of size (80,) if age_wgts.shape[0] != 80: err = "Vector age_wgts does not have 80 elements." raise RuntimeError(err) # Return and error if abil_wgts is not a vector of size (7,) if abil_wgts.shape[0] != 7: err = "Vector abil_wgts does not have 7 elements." raise RuntimeError(err) # 1) Generate polynomials and use them to get income profiles for # ages 21 to 80. one = np.array([ -0.09720122, 0.05995294, 0.17654618, 0.21168263, 0.21638731, 0.04500235, 0.09229392 ]) two = np.array([ 0.00247639, -0.00004086, -0.00240656, -0.00306555, -0.00321041, 0.00094253, 0.00012902 ]) three = np.array([ -0.00001842, -0.00000521, 0.00001039, 0.00001438, 0.00001579, -0.00001470, -0.00001169 ]) const = np.array([ 3.41e+00, 0.69689692, -0.78761958, -1.11e+00, -0.93939272, 1.60e+00, 1.89e+00 ]) ages_short = np.tile(np.linspace(21, 80, 60).reshape((60, 1)), (1, 7)) log_abil_paths = (const + (one * ages_short) + (two * (ages_short**2)) + (three * (ages_short**3))) abil_paths = np.exp(log_abil_paths) e_orig = np.zeros((80, 7)) e_orig[:60, :] = abil_paths e_orig[60:, :] = 0.0 # 2) Forecast (with some art) the path of the final 20 years of # ability types. This following variable is what percentage of # ability at age 80 ability falls to at age 100. In general, we # wanted people to lose half of their ability over a 20-year # period. The first entry is 0.47, though, because nothing higher # would converge. The second-to-last is 0.7 because this group # actually has a slightly higher ability at age 80 than the last # group, so this value makes it decrease more so it ends up being # monotonic. abil_deprec = np.array([0.47, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5]) # Initial guesses for the arctan. They're pretty sensitive. init_guesses = np.array([[58, 0.0756438545595, -5.6940142786], [27, 0.069, -5], [35, .06, -5], [37, 0.339936555352, -33.5987329144], [70.5229181668, 0.0701993896947, -6.37746859905], [35, .06, -5], [35, .06, -5]]) for j in range(7): e_orig[60:, j] = arctan_fit(e_orig[59, j], one[j], two[j], three[j], abil_deprec[j], init_guesses[j]) # 3) Rescale the lifetime earnings path matrix so that the # population weighted average equals 1. e_orig_scaled = e_orig / (e_orig * age_wgts.reshape(80, 1) * abil_wgts.reshape(1, 7)).sum() if plot: ages_long = np.linspace(21, 100, 80) abil_midp = np.array([12.5, 37.5, 60.0, 75.0, 85.0, 94.5, 99.5]) # Plot original unscaled 80 x 7 ability matrix kwargs = {'filesuffix': '_orig_unscaled'} pp.plot_income_data(ages_long, abil_midp, abil_wgts, e_orig, OUTPUT_DIR, **kwargs) # Plot original scaled 80 x 7 ability matrix kwargs = {'filesuffix': '_orig_scaled'} pp.plot_income_data(ages_long, abil_midp, abil_wgts, e_orig_scaled, OUTPUT_DIR, **kwargs) return e_orig_scaled
def get_e_interp(S, age_wgts, age_wgts_80, abil_wgts, plot=False): ''' This function takes a source matrix of lifetime earnings profiles (abilities, emat) of size (80, 7), where 80 is the number of ages and 7 is the number of ability types in the source matrix, and interpolates new values of a new S x J sized matrix of abilities using linear interpolation. [NOTE: For this application, cubic spline interpolation introduces too much curvature.] Args: S (int): number of ages to interpolate. This method assumes that ages are evenly spaced between the beginning of the 21st year and the end of the 100th year, >= 3 age_wgts (Numpy array): distribution of population in each age for the interpolated ages, length S age_wgts_80 (Numpy array): percent of population in each one-year age from 21 to 100, length 80 abil_wgts (Numpy array): distribution of population in each ability group, length J plot (bool): if True, creates plots of emat_orig and the new interpolated emat_new Returns: emat_new_scaled (Numpy array): interpolated ability matrix scaled so that population-weighted average is 1, size SxJ ''' # Get original 80 x 7 ability matrix abil_wgts_orig = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]) emat_orig = get_e_orig(age_wgts_80, abil_wgts_orig, plot) # Return emat_orig if S = 80 and abil_wgts = abil_wgts_orig if S == 80 and np.array_equal(abil_wgts, np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])) is True: emat_new_scaled = emat_orig else: # generate abil_midp vector J = abil_wgts.shape[0] abil_midp = np.zeros(J) pct_lb = 0.0 for j in range(J): abil_midp[j] = pct_lb + 0.5 * abil_wgts[j] pct_lb += abil_wgts[j] # Make sure that values in abil_midp are within interpolating # bounds set by the hard coded abil_wgts_orig if abil_midp.min() < 0.125 or abil_midp.max() > 0.995: err = ("One or more entries in abils vector is outside the " + "allowable bounds.") raise RuntimeError(err) emat_j_midp = np.array([0.125, 0.375, 0.600, 0.750, 0.850, 0.945, 0.995]) emat_s_midp = np.linspace(20.5, 99.5, 80) emat_j_mesh, emat_s_mesh = np.meshgrid(emat_j_midp, emat_s_midp) newstep = 80 / S new_s_midp = np.linspace( 20 + 0.5 * newstep, 100 - 0.5 * newstep, S) new_j_mesh, new_s_mesh = np.meshgrid(abil_midp, new_s_midp) newcoords = np.hstack((emat_s_mesh.reshape((80*7, 1)), emat_j_mesh.reshape((80*7, 1)))) emat_new = si.griddata(newcoords, emat_orig.flatten(), (new_s_mesh, new_j_mesh), method='linear') emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(S, 1) * abil_wgts.reshape(1, J)).sum() if plot: kwargs = {'filesuffix': '_intrp_scaled'} pp.plot_income_data( new_s_midp, abil_midp, abil_wgts, emat_new_scaled, OUTPUT_DIR, **kwargs) return emat_new_scaled