Пример #1
0
def test_plot_income_data_save_fig(tmpdir):
    ages = np.linspace(20 + 0.5, 100 - 0.5, 80)
    abil_midp = np.array([0.125, 0.375, 0.6, 0.75, 0.85, 0.945, 0.995])
    abil_pcts = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])
    age_wgts = np.ones(80) * 1 / 80
    emat = income.get_e_orig(age_wgts, abil_pcts)
    parameter_plots.plot_income_data(
        ages, abil_midp, abil_pcts, emat, output_dir=tmpdir)
    img1 = mpimg.imread(os.path.join(tmpdir, 'ability_3D_lev.png'))
    img2 = mpimg.imread(os.path.join(tmpdir, 'ability_3D_log.png'))
    img3 = mpimg.imread(os.path.join(tmpdir, 'ability_2D_log.png'))

    assert isinstance(img1, np.ndarray)
    assert isinstance(img2, np.ndarray)
    assert isinstance(img3, np.ndarray)
Пример #2
0
def test_plot_income_data():
    ages = np.linspace(20 + 0.5, 100 - 0.5, 80)
    abil_midp = np.array([0.125, 0.375, 0.6, 0.75, 0.85, 0.945, 0.995])
    abil_pcts = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])
    age_wgts = np.ones(80) * 1 / 80
    emat = income.get_e_orig(age_wgts, abil_pcts)
    fig = parameter_plots.plot_income_data(
        ages, abil_midp, abil_pcts, emat)

    assert fig
Пример #3
0
def get_e_interp(S, age_wgts, age_wgts_80, abil_wgts, plot=False):
    '''
    This function takes a source matrix of lifetime earnings profiles
    (abilities, emat) of size (80, 7), where 80 is the number of ages
    and 7 is the number of ability types in the source matrix, and
    interpolates new values of a new S x J sized matrix of abilities
    using linear interpolation. [NOTE: For this application, cubic
    spline interpolation introduces too much curvature.]

    This function also includes the two cases in which J = 9 and J = 10
    that include higher lifetime earning percentiles calibrated using
    Piketty and Saez (2003).


    Args:
        S (int): number of ages to interpolate. This method assumes that
            ages are evenly spaced between the beginning of the 21st
            year and the end of the 100th year, >= 3
        age_wgts (Numpy array): distribution of population in each age
            for the interpolated ages, length S
        age_wgts_80 (Numpy array): percent of population in each
            one-year age from 21 to 100, length 80
        abil_wgts (Numpy array): distribution of population in each
            ability group, length J
        plot (bool): if True, creates plots of emat_orig and the new
            interpolated emat_new

    Returns:
        emat_new_scaled (Numpy array): interpolated ability matrix scaled
            so that population-weighted average is 1, size SxJ

    '''
    # Get original 80 x 7 ability matrix
    abil_wgts_orig = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])
    emat_orig = get_e_orig(age_wgts_80, abil_wgts_orig, plot)
    if (S == 80 and np.array_equal(
            np.squeeze(abil_wgts),
            np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])) is True):
        emat_new_scaled = emat_orig
    elif (S == 80 and np.array_equal(
            np.squeeze(abil_wgts),
            np.array([
                0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.005, 0.004, 0.0009, 0.0001
            ])) is True):
        emat_new = np.zeros((S, len(abil_wgts)))
        emat_new[:, :7] = emat_orig
        # Create profiles for top 0.5%, top 0.1% and top 0.01% using
        # Piketty and Saez estimates
        # (https://eml.berkeley.edu/~saez/pikettyqje.pdf)
        # updated for 2018 to create scaling factor
        # assumption is that profile shape of these top 3 groups are
        # same as the top 1% estimated in tax data, just scaled up by
        # ratio determined from P&S 2018 estimates (Table 0, ex cap gains)
        emat_new[:, 6] = emat_orig[:, -1] * 0.458759521
        emat_new[:, 7] = emat_orig[:, -1] * 0.847252448
        emat_new[:, 8] = emat_orig[:, -1] * 2.713698465
        emat_new[:, 9] = emat_orig[:, -1] * 18.74863983
        emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(80, 1) *
                                      abil_wgts.reshape(1, 10)).sum()
    elif (S == 80 and np.array_equal(
            np.squeeze(abil_wgts),
            np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.005, 0.004, 0.001]))
          is True):
        emat_new = np.zeros((S, len(abil_wgts)))
        emat_new[:, :7] = emat_orig
        # Create profiles for top 0.5%, top 0.1% using
        # Piketty and Saez estimates
        # (https://eml.berkeley.edu/~saez/pikettyqje.pdf)
        # updated for 2018 to create scaling factor
        # assumption is that profile shape of these top 3 groups are
        # same as the top 1% estimated in tax data, just scaled up by
        # ratio determined from P&S 2018 estimates (Table 0, ex cap gains)
        emat_new[:, 6] = emat_orig[:, -1] * 0.458759521
        emat_new[:, 7] = emat_orig[:, -1] * 0.847252448
        emat_new[:, 8] = emat_orig[:, -1] * 4.317192601
        emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(80, 1) *
                                      abil_wgts.reshape(1, 9)).sum()
    else:
        # generate abil_midp vector
        J = abil_wgts.shape[0]
        abil_midp = np.zeros(J)
        pct_lb = 0.0
        for j in range(J):
            abil_midp[j] = pct_lb + 0.5 * abil_wgts[j]
            pct_lb += abil_wgts[j]

        # Make sure that values in abil_midp are within interpolating
        # bounds set by the hard coded abil_wgts_orig
        if abil_midp.min() < 0.125 or abil_midp.max() > 0.995:
            err = ("One or more entries in abils vector is outside the " +
                   "allowable bounds.")
            raise RuntimeError(err)

        emat_j_midp = np.array(
            [0.125, 0.375, 0.600, 0.750, 0.850, 0.945, 0.995])
        emat_s_midp = np.linspace(20.5, 99.5, 80)
        emat_j_mesh, emat_s_mesh = np.meshgrid(emat_j_midp, emat_s_midp)
        newstep = 80 / S
        new_s_midp = np.linspace(20 + 0.5 * newstep, 100 - 0.5 * newstep, S)
        new_j_mesh, new_s_mesh = np.meshgrid(abil_midp, new_s_midp)
        newcoords = np.hstack((emat_s_mesh.reshape(
            (80 * 7, 1)), emat_j_mesh.reshape((80 * 7, 1))))
        emat_new = si.griddata(newcoords,
                               emat_orig.flatten(), (new_s_mesh, new_j_mesh),
                               method='linear')
        emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(S, 1) *
                                      abil_wgts.reshape(1, J)).sum()

        if plot:
            kwargs = {'filesuffix': '_intrp_scaled'}
            pp.plot_income_data(new_s_midp, abil_midp, abil_wgts,
                                emat_new_scaled, OUTPUT_DIR, **kwargs)

    return emat_new_scaled
Пример #4
0
def get_e_orig(age_wgts, abil_wgts, plot=False):
    r'''
    This function generates the 80 x 7 matrix of lifetime earnings
    ability profiles, corresponding to annual ages from 21 to 100 and to
    paths based on income percentiles 0-25, 25-50, 50-70, 70-80, 80-90,
    90-99, 99-100. The ergodic population distribution is an input in
    order to rescale the paths so that the weighted average equals 1.

    The data come from the following file:

        `data/ability/FR_wage_profile_tables.xlsx`

    The polynomials are of the form

    .. math::
        \ln(abil) = \alpha + \beta_{1}\text{age} + \beta_{2}\text{age}^2
            + \beta_{3}\text{age}^3

    Values come from regression analysis using IRS CWHS with hours
    imputed from the CPS.

    Args:
        age_wgts (Numpy array): ergodic age distribution, length S
        abil_wgts (Numpy array): population weights in each lifetime
            earnings group, length J
        plot (bool): if True, generates 3D plots of ability paths

    Returns:
        e_orig_scaled (Numpy array): = lifetime ability profiles scaled
            so that population-weighted average is 1, size SxJ

    '''
    # Return and error if age_wgts is not a vector of size (80,)
    if age_wgts.shape[0] != 80:
        err = "Vector age_wgts does not have 80 elements."
        raise RuntimeError(err)
    # Return and error if abil_wgts is not a vector of size (7,)
    if abil_wgts.shape[0] != 7:
        err = "Vector abil_wgts does not have 7 elements."
        raise RuntimeError(err)

    # 1) Generate polynomials and use them to get income profiles for
    #    ages 21 to 80.
    one = np.array([
        -0.09720122, 0.05995294, 0.17654618, 0.21168263, 0.21638731,
        0.04500235, 0.09229392
    ])
    two = np.array([
        0.00247639, -0.00004086, -0.00240656, -0.00306555, -0.00321041,
        0.00094253, 0.00012902
    ])
    three = np.array([
        -0.00001842, -0.00000521, 0.00001039, 0.00001438, 0.00001579,
        -0.00001470, -0.00001169
    ])
    const = np.array([
        3.41e+00, 0.69689692, -0.78761958, -1.11e+00, -0.93939272, 1.60e+00,
        1.89e+00
    ])
    ages_short = np.tile(np.linspace(21, 80, 60).reshape((60, 1)), (1, 7))
    log_abil_paths = (const + (one * ages_short) + (two * (ages_short**2)) +
                      (three * (ages_short**3)))
    abil_paths = np.exp(log_abil_paths)
    e_orig = np.zeros((80, 7))
    e_orig[:60, :] = abil_paths
    e_orig[60:, :] = 0.0

    # 2) Forecast (with some art) the path of the final 20 years of
    #    ability types. This following variable is what percentage of
    #    ability at age 80 ability falls to at age 100. In general, we
    #    wanted people to lose half of their ability over a 20-year
    #    period. The first entry is 0.47, though, because nothing higher
    #    would converge. The second-to-last is 0.7 because this group
    #    actually has a slightly higher ability at age 80 than the last
    #    group, so this value makes it decrease more so it ends up being
    #    monotonic.
    abil_deprec = np.array([0.47, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5])
    #     Initial guesses for the arctan. They're pretty sensitive.
    init_guesses = np.array([[58, 0.0756438545595, -5.6940142786],
                             [27, 0.069, -5], [35, .06, -5],
                             [37, 0.339936555352, -33.5987329144],
                             [70.5229181668, 0.0701993896947, -6.37746859905],
                             [35, .06, -5], [35, .06, -5]])
    for j in range(7):
        e_orig[60:, j] = arctan_fit(e_orig[59, j], one[j], two[j], three[j],
                                    abil_deprec[j], init_guesses[j])

    # 3) Rescale the lifetime earnings path matrix so that the
    #    population weighted average equals 1.
    e_orig_scaled = e_orig / (e_orig * age_wgts.reshape(80, 1) *
                              abil_wgts.reshape(1, 7)).sum()

    if plot:
        ages_long = np.linspace(21, 100, 80)
        abil_midp = np.array([12.5, 37.5, 60.0, 75.0, 85.0, 94.5, 99.5])
        # Plot original unscaled 80 x 7 ability matrix
        kwargs = {'filesuffix': '_orig_unscaled'}
        pp.plot_income_data(ages_long, abil_midp, abil_wgts, e_orig,
                            OUTPUT_DIR, **kwargs)

        # Plot original scaled 80 x 7 ability matrix
        kwargs = {'filesuffix': '_orig_scaled'}
        pp.plot_income_data(ages_long, abil_midp, abil_wgts, e_orig_scaled,
                            OUTPUT_DIR, **kwargs)

    return e_orig_scaled
Пример #5
0
def get_e_interp(S, age_wgts, age_wgts_80, abil_wgts, plot=False):
    '''
    This function takes a source matrix of lifetime earnings profiles
    (abilities, emat) of size (80, 7), where 80 is the number of ages
    and 7 is the number of ability types in the source matrix, and
    interpolates new values of a new S x J sized matrix of abilities
    using linear interpolation. [NOTE: For this application, cubic
    spline interpolation introduces too much curvature.]

    Args:
        S (int): number of ages to interpolate. This method assumes that
            ages are evenly spaced between the beginning of the 21st
            year and the end of the 100th year, >= 3
        age_wgts (Numpy array): distribution of population in each age
            for the interpolated ages, length S
        age_wgts_80 (Numpy array): percent of population in each
            one-year age from 21 to 100, length 80
        abil_wgts (Numpy array): distribution of population in each
            ability group, length J
        plot (bool): if True, creates plots of emat_orig and the new
            interpolated emat_new

    Returns:
        emat_new_scaled (Numpy array): interpolated ability matrix scaled
            so that population-weighted average is 1, size SxJ

    '''
    # Get original 80 x 7 ability matrix
    abil_wgts_orig = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01])
    emat_orig = get_e_orig(age_wgts_80, abil_wgts_orig, plot)

    # Return emat_orig if S = 80 and abil_wgts = abil_wgts_orig
    if S == 80 and np.array_equal(abil_wgts,
                                  np.array([0.25, 0.25, 0.2, 0.1, 0.1,
                                            0.09, 0.01])) is True:
        emat_new_scaled = emat_orig
    else:
        # generate abil_midp vector
        J = abil_wgts.shape[0]
        abil_midp = np.zeros(J)
        pct_lb = 0.0
        for j in range(J):
            abil_midp[j] = pct_lb + 0.5 * abil_wgts[j]
            pct_lb += abil_wgts[j]

        # Make sure that values in abil_midp are within interpolating
        # bounds set by the hard coded abil_wgts_orig
        if abil_midp.min() < 0.125 or abil_midp.max() > 0.995:
            err = ("One or more entries in abils vector is outside the "
                   + "allowable bounds.")
            raise RuntimeError(err)

        emat_j_midp = np.array([0.125, 0.375, 0.600, 0.750, 0.850,
                                0.945, 0.995])
        emat_s_midp = np.linspace(20.5, 99.5, 80)
        emat_j_mesh, emat_s_mesh = np.meshgrid(emat_j_midp, emat_s_midp)
        newstep = 80 / S
        new_s_midp = np.linspace(
            20 + 0.5 * newstep, 100 - 0.5 * newstep, S)
        new_j_mesh, new_s_mesh = np.meshgrid(abil_midp, new_s_midp)
        newcoords = np.hstack((emat_s_mesh.reshape((80*7, 1)),
                               emat_j_mesh.reshape((80*7, 1))))
        emat_new = si.griddata(newcoords, emat_orig.flatten(),
                               (new_s_mesh, new_j_mesh), method='linear')
        emat_new_scaled = emat_new / (emat_new * age_wgts.reshape(S, 1)
                                      * abil_wgts.reshape(1, J)).sum()

        if plot:
            kwargs = {'filesuffix': '_intrp_scaled'}
            pp.plot_income_data(
                new_s_midp, abil_midp, abil_wgts, emat_new_scaled,
                OUTPUT_DIR, **kwargs)

    return emat_new_scaled