Пример #1
0
def test_dropq_with_full_puf(puf_path):
    # specify usermods dictionary in code
    fyr = 2016
    reforms = dict()
    reforms['_II_rt4'] = [0.39, 0.40, 0.41]
    reforms['_PT_rt4'] = [0.39, 0.40, 0.41]
    reforms['_II_rt3'] = [0.31, 0.32, 0.33]
    reforms['_PT_rt3'] = [0.31, 0.32, 0.33]
    usermods = dict()
    usermods['policy'] = {fyr: reforms}
    usermods['consumption'] = {}
    usermods['behavior'] = {}
    usermods['growdiff_baseline'] = {}
    usermods['growdiff_response'] = {}
    usermods['gdp_elasticity'] = {}
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    clp.implement_reform(usermods['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_path)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc.increment_year()
    calc.increment_year()
    calc.increment_year()
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    nyrs = 2
    adt = multiyear_diagnostic_table(calc, nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    assert taxes_fullsample is not None
    # create a Public Use File object
    tax_data = pd.read_csv(puf_path)
    # call dropq.run_model
    (mY_dec, _, _, _, _, _, _, _, _, _,
     fiscal_tots) = dropq.run_model(tax_data,
                                    start_year=fyr,
                                    user_mods=usermods,
                                    return_json=False,
                                    num_years=nyrs)
    fulls_reform_revenue = taxes_fullsample.loc[fyr]
    dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums']
    dropq_reform_revenue *= 1e-9  # convert to billions of dollars
    diff = abs(fulls_reform_revenue - dropq_reform_revenue)
    # assert that dropq revenue is similar to the fullsample calculation
    assert diff / fulls_reform_revenue < 0.01
    # assert that Reform - Baseline = Reported Delta
    delta_yr0 = fiscal_tots[0]
    baseline_yr0 = fiscal_tots[1]
    reform_yr0 = fiscal_tots[2]
    diff_yr0 = (reform_yr0.loc['combined_tax'] -
                baseline_yr0.loc['combined_tax']).values
    delta_yr0 = delta_yr0.loc['combined_tax'].values
    npt.assert_allclose(diff_yr0, delta_yr0)
Пример #2
0
def test_agg(tests_path):
    """
    Test current-law aggregate taxes using cps.csv file.
    """
    # pylint: disable=too-many-locals
    nyrs = 10
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (rec) containing all cps.csv input records
    rec = Records.cps_constructor()
    # create a Calculator object using clp policy and cps records
    calc = Calculator(policy=clp, records=rec)
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, nyrs)
    # convert adt to a string with a trailing EOL character
    actual_results = adt.to_string() + '\n'
    act = actual_results.splitlines(True)
    # read expected results from file
    aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    exp = expected_results.splitlines(True)
    # ensure act and exp line lists have differences less than "small" value
    epsilon = 1e-6
    if sys.version_info.major == 2:
        small = epsilon  # tighter test for Python 2.7
    else:
        small = 0.1 + epsilon  # looser test for Python 3.x
    diff_lines = list()
    assert len(act) == len(exp)
    for actline, expline in zip(act, exp):
        if actline == expline:
            continue
        diffs = line_diff_list(actline, expline, small)
        if len(diffs) > 0:
            diff_lines.extend(diffs)
    # test failure if there are any diff_lines
    if len(diff_lines) > 0:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(actual_results)
        msg = 'CPSCSV AGG RESULTS DIFFER\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy cpscsv_agg_actual.txt to  ---\n'
        msg += '---                 cpscsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        for line in diff_lines:
            msg += line
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
Пример #3
0
def test_full_dropq_puf(puf_path):

    myvars = {}
    myvars['_II_rt4'] = [0.39, 0.40, 0.41]
    myvars['_PT_rt4'] = [0.39, 0.40, 0.41]
    myvars['_II_rt3'] = [0.31, 0.32, 0.33]
    myvars['_PT_rt3'] = [0.31, 0.32, 0.33]
    first = 2016
    user_mods = {first: myvars}

    nyrs = 2
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    clp.implement_reform(user_mods)
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_path)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc.increment_year()
    calc.increment_year()
    calc.increment_year()
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]

    assert taxes_fullsample is not None

    # Create a Public Use File object
    tax_data = pd.read_csv(puf_path)

    (mY_dec, mX_dec, df_dec, pdf_dec, cdf_dec, mY_bin, mX_bin, df_bin,
        pdf_bin, cdf_bin, fiscal_tots) = dropq.run_models(tax_data,
                                                          start_year=first,
                                                          user_mods=user_mods,
                                                          return_json=False,
                                                          num_years=2)

    pure_reform_revenue = taxes_fullsample.loc[first]
    dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums']
    dropq_reform_revenue /= 1e9  # Round to billions of dollars
    diff = abs(pure_reform_revenue - dropq_reform_revenue)
    # Assert that dropq revenue is similar to the "pure" calculation
    assert diff / dropq_reform_revenue < 0.02

    # Assert that Reform - Baseline = Reported Delta
    delta_yr0 = fiscal_tots[0]
    baseline_yr0 = fiscal_tots[1]
    reform_yr0 = fiscal_tots[2]
    diff_yr0 = (reform_yr0.loc['combined_tax'] -
                baseline_yr0.loc['combined_tax']).values
    delta_yr0 = delta_yr0.loc['combined_tax'].values
    npt.assert_array_almost_equal(diff_yr0, delta_yr0, decimal=3)
Пример #4
0
def test_with_pufcsv(puf_fullsample):
    # specify usermods dictionary in code
    start_year = 2017
    reform_year = start_year
    analysis_year = 2026
    year_n = analysis_year - start_year
    reform = {'_FICA_ss_trt': [0.2]}
    usermods = dict()
    usermods['policy'] = {reform_year: reform}
    usermods['consumption'] = {}
    usermods['behavior'] = {}
    usermods['growdiff_baseline'] = {}
    usermods['growdiff_response'] = {}
    usermods['gdp_elasticity'] = {}
    seed = random_seed(usermods)
    assert seed == 1574318062
    # create a Policy object (pol) containing reform policy parameters
    pol = Policy()
    pol.implement_reform(usermods['policy'])
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_fullsample)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=pol, records=rec)
    while calc.current_year < analysis_year:
        calc.increment_year()
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, 1)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    assert taxes_fullsample is not None
    fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year])
    # create a Public Use File object
    tax_data = puf_fullsample
    # call run_nth_year_tax_calc_model function
    resdict = run_nth_year_tax_calc_model(year_n,
                                          start_year,
                                          tax_data,
                                          usermods,
                                          return_json=True)
    total = resdict['aggr_2']
    dropq_reform_revenue = float(total['combined_tax_9']) * 1e-9
    # assert that dropq revenue is similar to the fullsample calculation
    diff = abs(fulls_reform_revenue - dropq_reform_revenue)
    proportional_diff = diff / fulls_reform_revenue
    frmt = 'f,d,adiff,pdiff=  {:.4f}  {:.4f}  {:.4f}  {}'
    print(
        frmt.format(fulls_reform_revenue, dropq_reform_revenue, diff,
                    proportional_diff))
    assert proportional_diff < 0.0001  # one-hundredth of one percent
Пример #5
0
def test_full_dropq_puf(puf_path):

    myvars = {}
    myvars['_II_rt4'] = [0.39, 0.40, 0.41]
    myvars['_PT_rt4'] = [0.39, 0.40, 0.41]
    myvars['_II_rt3'] = [0.31, 0.32, 0.33]
    myvars['_PT_rt3'] = [0.31, 0.32, 0.33]
    first = 2016
    user_mods = {first: myvars}

    nyrs = 2

    # Create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    clp.implement_reform(user_mods)
    # Create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_path)
    # Create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc.increment_year()
    calc.increment_year()
    calc.increment_year()
    # Create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    assert taxes_fullsample is not None
    # Create a Public Use File object
    tax_data = pd.read_csv(puf_path)
    (mY_dec, _, _, _, _, _, _, _,
     _, _, fiscal_tots) = dropq.run_models(tax_data,
                                           start_year=first,
                                           user_mods=user_mods,
                                           return_json=False,
                                           num_years=nyrs)
    pure_reform_revenue = taxes_fullsample.loc[first]
    dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums']
    dropq_reform_revenue *= 1e-9  # convert to billions of dollars
    diff = abs(pure_reform_revenue - dropq_reform_revenue)
    # Assert that dropq revenue is similar to the "pure" calculation
    assert diff / pure_reform_revenue < 0.01
    # Assert that Reform - Baseline = Reported Delta
    delta_yr0 = fiscal_tots[0]
    baseline_yr0 = fiscal_tots[1]
    reform_yr0 = fiscal_tots[2]
    diff_yr0 = (reform_yr0.loc['combined_tax'] -
                baseline_yr0.loc['combined_tax']).values
    delta_yr0 = delta_yr0.loc['combined_tax'].values
    npt.assert_allclose(diff_yr0, delta_yr0)
Пример #6
0
def test_agg(tests_path, puf_path):  # pylint: disable=redefined-outer-name
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a two-percent sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    nyrs = 10
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_path)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # generate differences between actual and expected results
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expected = expected_results.splitlines(True)
    diff = difflib.unified_diff(expected,
                                actual,
                                fromfile='expected',
                                tofile='actual',
                                n=0)
    # convert diff generator into a list of lines:
    diff_lines = list()
    for line in diff:
        diff_lines.append(line)
    # test failure if there are any diff_lines
    if len(diff_lines) > 0:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using sub sample of records
    fullsample = pd.read_csv(puf_path)
    rn_seed = 80  # to ensure sub-sample is always the same
    subfrac = 0.02  # sub-sample fraction
    subsample = fullsample.sample(
        frac=subfrac,  # pylint: disable=no-member
        random_state=rn_seed)
    rec_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=Policy(), records=rec_subsample)
    adt_subsample = multiyear_diagnostic_table(calc_subsample, num_years=nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.04  # maximum allowed relative difference in tax liability
    if not np.allclose(
            taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol):
        msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN subfrac = {:.3f} and reltol = {:.4f}\n'.format(
            subfrac, reltol)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldif= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)
Пример #7
0
def test_agg(tests_path, puf_path):
    """
    Test Tax-Calculator aggregate taxes with no policy reform using
    the full-sample puf.csv and a two-percent sub-sample of puf.csv
    """
    # pylint: disable=too-many-locals,too-many-statements
    # for fixture args, pylint: disable=redefined-outer-name
    nyrs = 10
    # create a Policy object (clp) containing current-law policy parameters
    clp = Policy()
    # create a Records object (rec) containing all puf.csv input records
    rec = Records(data=puf_path)
    # create a Calculator object using clp policy and puf records
    calc = Calculator(policy=clp, records=rec)
    calc_start_year = calc.current_year
    # create aggregate diagnostic table (adt) as a Pandas DataFrame object
    adt = multiyear_diagnostic_table(calc, nyrs)
    taxes_fullsample = adt.loc["Combined Liability ($b)"]
    # convert adt results to a string with a trailing EOL character
    adtstr = adt.to_string() + '\n'
    # generate differences between actual and expected results
    actual = adtstr.splitlines(True)
    aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt')
    with open(aggres_path, 'r') as expected_file:
        txt = expected_file.read()
    expected_results = txt.rstrip('\n\t ') + '\n'  # cleanup end of file txt
    expected = expected_results.splitlines(True)
    diff = difflib.unified_diff(expected, actual,
                                fromfile='expected', tofile='actual', n=0)
    # convert diff generator into a list of lines:
    diff_lines = list()
    for line in diff:
        diff_lines.append(line)
    # test failure if there are any diff_lines
    if len(diff_lines) > 0:
        new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt')
        with open(new_filename, 'w') as new_file:
            new_file.write(adtstr)
        msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n'
        msg += '-------------------------------------------------\n'
        msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n'
        msg += '--- if new OK, copy pufcsv_agg_actual.txt to  ---\n'
        msg += '---                 pufcsv_agg_expect.txt     ---\n'
        msg += '---            and rerun test.                ---\n'
        msg += '-------------------------------------------------\n'
        raise ValueError(msg)
    # create aggregate diagnostic table using sub sample of records
    fullsample = pd.read_csv(puf_path)
    rn_seed = 80  # to ensure two-percent sub-sample is always the same
    subsample = fullsample.sample(frac=0.02,  # pylint: disable=no-member
                                  random_state=rn_seed)
    rec_subsample = Records(data=subsample)
    calc_subsample = Calculator(policy=Policy(), records=rec_subsample)
    adt_subsample = multiyear_diagnostic_table(calc_subsample, num_years=nyrs)
    # compare combined tax liability from full and sub samples for each year
    taxes_subsample = adt_subsample.loc["Combined Liability ($b)"]
    reltol = 0.01  # maximum allowed relative difference in tax liability
    if not np.allclose(taxes_subsample, taxes_fullsample,
                       atol=0.0, rtol=reltol):
        msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n'
        msg += 'WHEN reltol = {:.4f}\n'.format(reltol)
        it_sub = np.nditer(taxes_subsample, flags=['f_index'])
        it_all = np.nditer(taxes_fullsample, flags=['f_index'])
        while not it_sub.finished:
            cyr = it_sub.index + calc_start_year
            tax_sub = float(it_sub[0])
            tax_all = float(it_all[0])
            reldiff = abs(tax_sub - tax_all) / abs(tax_all)
            if reldiff > reltol:
                msgstr = ' year,sub,full,reldif= {}\t{:.2f}\t{:.2f}\t{:.4f}\n'
                msg += msgstr.format(cyr, tax_sub, tax_all, reldiff)
            it_sub.iternext()
            it_all.iternext()
        raise ValueError(msg)