def test_dropq_with_full_puf(puf_path): # specify usermods dictionary in code fyr = 2016 reforms = dict() reforms['_II_rt4'] = [0.39, 0.40, 0.41] reforms['_PT_rt4'] = [0.39, 0.40, 0.41] reforms['_II_rt3'] = [0.31, 0.32, 0.33] reforms['_PT_rt3'] = [0.31, 0.32, 0.33] usermods = dict() usermods['policy'] = {fyr: reforms} usermods['consumption'] = {} usermods['behavior'] = {} usermods['growdiff_baseline'] = {} usermods['growdiff_response'] = {} usermods['gdp_elasticity'] = {} # create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.implement_reform(usermods['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_path) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc.increment_year() calc.increment_year() calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object nyrs = 2 adt = multiyear_diagnostic_table(calc, nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None # create a Public Use File object tax_data = pd.read_csv(puf_path) # call dropq.run_model (mY_dec, _, _, _, _, _, _, _, _, _, fiscal_tots) = dropq.run_model(tax_data, start_year=fyr, user_mods=usermods, return_json=False, num_years=nyrs) fulls_reform_revenue = taxes_fullsample.loc[fyr] dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums'] dropq_reform_revenue *= 1e-9 # convert to billions of dollars diff = abs(fulls_reform_revenue - dropq_reform_revenue) # assert that dropq revenue is similar to the fullsample calculation assert diff / fulls_reform_revenue < 0.01 # assert that Reform - Baseline = Reported Delta delta_yr0 = fiscal_tots[0] baseline_yr0 = fiscal_tots[1] reform_yr0 = fiscal_tots[2] diff_yr0 = (reform_yr0.loc['combined_tax'] - baseline_yr0.loc['combined_tax']).values delta_yr0 = delta_yr0.loc['combined_tax'].values npt.assert_allclose(diff_yr0, delta_yr0)
def test_agg(tests_path): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-locals nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all cps.csv input records rec = Records.cps_constructor() # create a Calculator object using clp policy and cps records calc = Calculator(policy=clp, records=rec) # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, nyrs) # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' act = actual_results.splitlines(True) # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt exp = expected_results.splitlines(True) # ensure act and exp line lists have differences less than "small" value epsilon = 1e-6 if sys.version_info.major == 2: small = epsilon # tighter test for Python 2.7 else: small = 0.1 + epsilon # looser test for Python 3.x diff_lines = list() assert len(act) == len(exp) for actline, expline in zip(act, exp): if actline == expline: continue diffs = line_diff_list(actline, expline, small) if len(diffs) > 0: diff_lines.extend(diffs) # test failure if there are any diff_lines if len(diff_lines) > 0: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' for line in diff_lines: msg += line msg += '-------------------------------------------------\n' raise ValueError(msg)
def test_full_dropq_puf(puf_path): myvars = {} myvars['_II_rt4'] = [0.39, 0.40, 0.41] myvars['_PT_rt4'] = [0.39, 0.40, 0.41] myvars['_II_rt3'] = [0.31, 0.32, 0.33] myvars['_PT_rt3'] = [0.31, 0.32, 0.33] first = 2016 user_mods = {first: myvars} nyrs = 2 # create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.implement_reform(user_mods) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_path) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc.increment_year() calc.increment_year() calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None # Create a Public Use File object tax_data = pd.read_csv(puf_path) (mY_dec, mX_dec, df_dec, pdf_dec, cdf_dec, mY_bin, mX_bin, df_bin, pdf_bin, cdf_bin, fiscal_tots) = dropq.run_models(tax_data, start_year=first, user_mods=user_mods, return_json=False, num_years=2) pure_reform_revenue = taxes_fullsample.loc[first] dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums'] dropq_reform_revenue /= 1e9 # Round to billions of dollars diff = abs(pure_reform_revenue - dropq_reform_revenue) # Assert that dropq revenue is similar to the "pure" calculation assert diff / dropq_reform_revenue < 0.02 # Assert that Reform - Baseline = Reported Delta delta_yr0 = fiscal_tots[0] baseline_yr0 = fiscal_tots[1] reform_yr0 = fiscal_tots[2] diff_yr0 = (reform_yr0.loc['combined_tax'] - baseline_yr0.loc['combined_tax']).values delta_yr0 = delta_yr0.loc['combined_tax'].values npt.assert_array_almost_equal(diff_yr0, delta_yr0, decimal=3)
def test_with_pufcsv(puf_fullsample): # specify usermods dictionary in code start_year = 2017 reform_year = start_year analysis_year = 2026 year_n = analysis_year - start_year reform = {'_FICA_ss_trt': [0.2]} usermods = dict() usermods['policy'] = {reform_year: reform} usermods['consumption'] = {} usermods['behavior'] = {} usermods['growdiff_baseline'] = {} usermods['growdiff_response'] = {} usermods['gdp_elasticity'] = {} seed = random_seed(usermods) assert seed == 1574318062 # create a Policy object (pol) containing reform policy parameters pol = Policy() pol.implement_reform(usermods['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using clp policy and puf records calc = Calculator(policy=pol, records=rec) while calc.current_year < analysis_year: calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, 1) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None fulls_reform_revenue = float(taxes_fullsample.loc[analysis_year]) # create a Public Use File object tax_data = puf_fullsample # call run_nth_year_tax_calc_model function resdict = run_nth_year_tax_calc_model(year_n, start_year, tax_data, usermods, return_json=True) total = resdict['aggr_2'] dropq_reform_revenue = float(total['combined_tax_9']) * 1e-9 # assert that dropq revenue is similar to the fullsample calculation diff = abs(fulls_reform_revenue - dropq_reform_revenue) proportional_diff = diff / fulls_reform_revenue frmt = 'f,d,adiff,pdiff= {:.4f} {:.4f} {:.4f} {}' print( frmt.format(fulls_reform_revenue, dropq_reform_revenue, diff, proportional_diff)) assert proportional_diff < 0.0001 # one-hundredth of one percent
def test_full_dropq_puf(puf_path): myvars = {} myvars['_II_rt4'] = [0.39, 0.40, 0.41] myvars['_PT_rt4'] = [0.39, 0.40, 0.41] myvars['_II_rt3'] = [0.31, 0.32, 0.33] myvars['_PT_rt3'] = [0.31, 0.32, 0.33] first = 2016 user_mods = {first: myvars} nyrs = 2 # Create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.implement_reform(user_mods) # Create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_path) # Create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc.increment_year() calc.increment_year() calc.increment_year() # Create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None # Create a Public Use File object tax_data = pd.read_csv(puf_path) (mY_dec, _, _, _, _, _, _, _, _, _, fiscal_tots) = dropq.run_models(tax_data, start_year=first, user_mods=user_mods, return_json=False, num_years=nyrs) pure_reform_revenue = taxes_fullsample.loc[first] dropq_reform_revenue = mY_dec['_combined_dec_0'].loc['sums'] dropq_reform_revenue *= 1e-9 # convert to billions of dollars diff = abs(pure_reform_revenue - dropq_reform_revenue) # Assert that dropq revenue is similar to the "pure" calculation assert diff / pure_reform_revenue < 0.01 # Assert that Reform - Baseline = Reported Delta delta_yr0 = fiscal_tots[0] baseline_yr0 = fiscal_tots[1] reform_yr0 = fiscal_tots[2] diff_yr0 = (reform_yr0.loc['combined_tax'] - baseline_yr0.loc['combined_tax']).values delta_yr0 = delta_yr0.loc['combined_tax'].values npt.assert_allclose(diff_yr0, delta_yr0)
def test_agg(tests_path, puf_path): # pylint: disable=redefined-outer-name """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a two-percent sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_path) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # generate differences between actual and expected results actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if len(diff_lines) > 0: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using sub sample of records fullsample = pd.read_csv(puf_path) rn_seed = 80 # to ensure sub-sample is always the same subfrac = 0.02 # sub-sample fraction subsample = fullsample.sample( frac=subfrac, # pylint: disable=no-member random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=Policy(), records=rec_subsample) adt_subsample = multiyear_diagnostic_table(calc_subsample, num_years=nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.04 # maximum allowed relative difference in tax liability if not np.allclose( taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac = {:.3f} and reltol = {:.4f}\n'.format( subfrac, reltol) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldif= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_agg(tests_path, puf_path): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a two-percent sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements # for fixture args, pylint: disable=redefined-outer-name nyrs = 10 # create a Policy object (clp) containing current-law policy parameters clp = Policy() # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_path) # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = multiyear_diagnostic_table(calc, nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # generate differences between actual and expected results actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expected = expected_results.splitlines(True) diff = difflib.unified_diff(expected, actual, fromfile='expected', tofile='actual', n=0) # convert diff generator into a list of lines: diff_lines = list() for line in diff: diff_lines.append(line) # test failure if there are any diff_lines if len(diff_lines) > 0: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using sub sample of records fullsample = pd.read_csv(puf_path) rn_seed = 80 # to ensure two-percent sub-sample is always the same subsample = fullsample.sample(frac=0.02, # pylint: disable=no-member random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=Policy(), records=rec_subsample) adt_subsample = multiyear_diagnostic_table(calc_subsample, num_years=nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN reltol = {:.4f}\n'.format(reltol) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldif= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)