def differences(new_filename, old_filename, stat_kind, small=0.0): """ Return message string if there are differences at least as large as small; otherwise (i.e., if there are only small differences) return empty string. """ with open(new_filename, 'r') as vfile: new_text = vfile.read() with open(old_filename, 'r') as vfile: old_text = vfile.read() if nonsmall_diffs(new_text.splitlines(True), old_text.splitlines(True), small): new_name = os.path.basename(new_filename) old_name = os.path.basename(old_filename) msg = '{} RESULTS DIFFER:\n'.format(stat_kind) msg += '-------------------------------------------------' msg += '-------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n'.format(new_name) msg += '--- if new OK, copy {} to ---\n'.format(new_name) msg += '--- {} ---\n'.format(old_name) msg += '--- and rerun test. ' msg += ' ---\n' msg += '-------------------------------------------------' msg += '-------------\n' else: msg = '' os.remove(new_filename) return msg
def differences(new_filename, old_filename, stat_kind, small=0.0): """ Return message string if there are differences at least as large as small; otherwise (i.e., if there are only small differences) return empty string. """ with open(new_filename, 'r') as vfile: new_text = vfile.read() with open(old_filename, 'r') as vfile: old_text = vfile.read() if nonsmall_diffs(new_text.splitlines(True), old_text.splitlines(True), small): new_name = os.path.basename(new_filename) old_name = os.path.basename(old_filename) msg = '{} RESULTS DIFFER:\n'.format(stat_kind) msg += '-------------------------------------------------' msg += '-------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n'.format(new_name) msg += '--- if new OK, copy {} to ---\n'.format(new_name) msg += '--- {} ---\n'.format(old_name) msg += '--- and rerun test. ' msg += ' ---\n' msg += '-------------------------------------------------' msg += '-------------\n' else: msg = '' os.remove(new_filename) return msg
def test_run_tax_calc_model(tests_path): """ Test tbi.run_nth_year_tax_calc_model function using CPS data. """ user_modifications = { 'policy': { 2016: { '_II_rt3': [0.33], '_PT_rt3': [0.33], '_II_rt4': [0.33], '_PT_rt4': [0.33] } }, 'consumption': { 2016: { '_MPC_e20400': [0.01] } }, 'behavior': { 2016: { '_BE_sub': [0.25] } }, 'growdiff_baseline': {}, 'growdiff_response': {} } res = run_nth_year_tax_calc_model(year_n=2, start_year=2018, use_puf_not_cps=False, use_full_sample=False, user_mods=user_modifications, return_dict=True) assert isinstance(res, dict) # put actual results in a multiline string actual_results = '' for tbl in sorted(res.keys()): actual_results += 'TABLE {} RESULTS:\n'.format(tbl) actual_results += json.dumps( res[tbl], sort_keys=True, indent=4, separators=(',', ': ')) + '\n' # read expected results from file expect_fname = 'tbi_cps_expect.txt' expect_path = os.path.join(tests_path, expect_fname) with open(expect_path, 'r') as expect_file: expect_results = expect_file.read() # ensure actual and expect results have no differences diffs = nonsmall_diffs(actual_results.splitlines(True), expect_results.splitlines(True)) if diffs: actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt') actual_path = os.path.join(tests_path, actual_fname) with open(actual_path, 'w') as actual_file: actual_file.write(actual_results) msg = 'TBI RESULTS DIFFER\n' msg += '----------------------------------------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n' msg += '--- if new OK, copy {} to ---\n' msg += '--- {} ---\n' msg += '--- and rerun test. ---\n' msg += '----------------------------------------------\n' raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
def res_and_out_are_same(base): """ Return True if base.res and base.out file contents are the same; return False if base.res and base.out file contents differ. """ with open(base + '.res') as resfile: act_res = resfile.read() with open(base + '.out') as outfile: exp_res = outfile.read() # check to see if act_res & exp_res have differences return not nonsmall_diffs(act_res.splitlines(True), exp_res.splitlines(True))
def res_and_out_are_same(base): """ Return True if base.res and base.out file contents are the same; return False if base.res and base.out file contents differ. """ with open(base + '.res') as resfile: act_res = resfile.read() with open(base + '.out') as outfile: exp_res = outfile.read() # check to see if act_res & exp_res have differences return not nonsmall_diffs(act_res.splitlines(True), exp_res.splitlines(True))
def test_run_taxcalc_model(tests_path): """ Test tbi.run_nth_year_taxcalc_model function using PUF data. """ user_modifications = { 'policy': { 2016: {'_II_rt3': [0.33], '_PT_rt3': [0.33], '_II_rt4': [0.33], '_PT_rt4': [0.33]} }, 'consumption': { 2016: {'_MPC_e20400': [0.01]} }, 'growdiff_baseline': { }, 'growdiff_response': { } } res = run_nth_year_taxcalc_model(year_n=2, start_year=2018, use_puf_not_cps=True, use_full_sample=False, user_mods=user_modifications, return_dict=True) assert isinstance(res, dict) # put actual results in a multiline string actual_results = '' for tbl in sorted(res.keys()): actual_results += 'TABLE {} RESULTS:\n'.format(tbl) actual_results += json.dumps(res[tbl], sort_keys=True, indent=4, separators=(',', ': ')) + '\n' # read expected results from file expect_fname = 'tbi_puf_expect.txt' expect_path = os.path.join(tests_path, expect_fname) with open(expect_path, 'r') as expect_file: expect_results = expect_file.read() # ensure actual and expect results have no differences diffs = nonsmall_diffs(actual_results.splitlines(True), expect_results.splitlines(True)) if diffs: actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt') actual_path = os.path.join(tests_path, actual_fname) with open(actual_path, 'w') as actual_file: actual_file.write(actual_results) msg = 'TBI RESULTS DIFFER\n' msg += '----------------------------------------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n' msg += '--- if new OK, copy {} to ---\n' msg += '--- {} ---\n' msg += '--- and rerun test. ---\n' msg += '----------------------------------------------\n' raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
def res_and_out_are_same(base): """ Return true if base.res and base.out file contents are the same; return false if base.res and base.out file contents differ. """ with open(base + '.out') as outfile: exp_res = outfile.read() exp = exp_res.splitlines(True) with open(base + '.res') as resfile: act_res = resfile.read() act = act_res.splitlines(True) # check that act & exp have differences no more than small value diffs = nonsmall_diffs(act, exp, small=1e-6) return not diffs
def test_run_tax_calc_model(using_puf, tests_path): res = run_nth_year_tax_calc_model(year_n=2, start_year=2018, use_puf_not_cps=using_puf, use_full_sample=False, user_mods=USER_MODS, return_dict=True) assert isinstance(res, dict) # put actual results in a multiline string actual_results = '' for tbl in sorted(res.keys()): actual_results += 'TABLE {} RESULTS:\n'.format(tbl) actual_results += json.dumps( res[tbl], sort_keys=True, indent=4, separators=(',', ': ')) + '\n' # read expected results from file if using_puf: expect_fname = 'tbi_puf_expect.txt' else: expect_fname = 'tbi_cps_expect.txt' expect_path = os.path.join(tests_path, expect_fname) with open(expect_path, 'r') as expect_file: expect_results = expect_file.read() # ensure actual and expect results have no differences diffs = nonsmall_diffs(actual_results.splitlines(True), expect_results.splitlines(True)) if diffs: actual_fname = '{}{}'.format(expect_fname[:-10], 'actual.txt') actual_path = os.path.join(tests_path, actual_fname) with open(actual_path, 'w') as actual_file: actual_file.write(actual_results) msg = 'TBI RESULTS DIFFER\n' msg += '----------------------------------------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n' msg += '--- if new OK, copy {} to ---\n' msg += '--- {} ---\n' msg += '--- and rerun test. ---\n' msg += '----------------------------------------------\n' raise ValueError(msg.format(actual_fname, actual_fname, expect_fname))
def differences(afilename, efilename): """ Check for differences between results in afilename and efilename files. """ with open(afilename, 'r') as afile: actres = afile.read() with open(efilename, 'r') as efile: expres = efile.read() diffs = nonsmall_diffs(actres.splitlines(True), expres.splitlines(True), 0.0) if diffs: afname = os.path.basename(afilename) efname = os.path.basename(efilename) msg = 'COMPARE RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n' msg += '--- if new OK, copy {} to ---\n' msg += '--- {} ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg.format(afname, afname, efname)) else: os.remove(afilename)
def differences(afilename, efilename): """ Check for differences between results in afilename and efilename files. """ with open(afilename, 'r') as afile: actres = afile.read() with open(efilename, 'r') as efile: expres = efile.read() diffs = nonsmall_diffs(actres.splitlines(True), expres.splitlines(True), 0.0) if diffs: afname = os.path.basename(afilename) efname = os.path.basename(efilename) msg = 'COMPARE RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN {} FILE ---\n' msg += '--- if new OK, copy {} to ---\n' msg += '--- {} ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg.format(afname, afname, efname)) else: os.remove(afilename)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing current law policy parameters baseline_policy = Policy() # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True)) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.014 else: reltol = 0.006 if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all cps.csv input records rec = Records.cps_constructor() # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string() + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences if sys.version_info.major == 2: small = 0.0 # tighter test for Python 2.7 else: small = 0.1 # looser test for Python 3.6 diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True), small) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records cps_filepath = os.path.join(tests_path, '..', 'cps.csv.gz') fullsample = pd.read_csv(cps_filepath) rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample, gfactors=Growfactors(), weights=Records.CPS_WEIGHTS_FILENAME, adjust_ratios=Records.CPS_RATIOS_FILENAME, start_year=Records.CPSCSV_YEAR) calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability # TODO: skip first year because of BUG in cps_weights.csv file taxes_subsample = taxes_subsample[1:] # TODO: eliminate code taxes_fullsample = taxes_fullsample[1:] # TODO: eliminate code if not np.allclose( taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'CPSCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format( subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records recs = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string(float_format='%8.1f') + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value diffs = nonsmall_diffs(actual, expect) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 2222 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records recs = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string(float_format='%8.1f') + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value diffs = nonsmall_diffs(actual, expect) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 2222 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose( taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nPUFCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_agg(tests_path, cps_fullsample): """ Test current-law aggregate taxes using cps.csv file. """ # pylint: disable=too-many-statements,too-many-locals nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all cps.csv input records recs = Records.cps_constructor(data=cps_fullsample) # create a Calculator object using baseline policy and cps records calc = Calculator(policy=baseline_policy, records=recs) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt to a string with a trailing EOL character actual_results = adt.to_string(float_format='%8.1f') + '\n' # read expected results from file aggres_path = os.path.join(tests_path, 'cpscsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt # ensure actual and expected results have no nonsmall differences diffs = nonsmall_diffs(actual_results.splitlines(True), expected_results.splitlines(True)) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(actual_results) msg = 'CPSCSV AGG RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN cpscsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy cpscsv_agg_actual.txt to ---\n' msg += '--- cpscsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.03 # sub-sample fraction subsample = cps_fullsample.sample(frac=subfrac, random_state=rn_seed) recs_subsample = Records.cps_constructor(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=recs_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] msg = '' for cyr in range(calc_start_year, calc_start_year + nyrs): if cyr == calc_start_year: reltol = 0.014 else: reltol = 0.006 if not np.allclose(taxes_subsample[cyr], taxes_fullsample[cyr], atol=0.0, rtol=reltol): reldiff = (taxes_subsample[cyr] / taxes_fullsample[cyr]) - 1. line1 = '\nCPSCSV AGG SUB-vs-FULL RESULTS DIFFER IN {}' line2 = '\n when subfrac={:.3f}, rtol={:.4f}, seed={}' line3 = '\n with sub={:.3f}, full={:.3f}, rdiff={:.4f}' msg += line1.format(cyr) msg += line2.format(subfrac, reltol, rn_seed) msg += line3.format(taxes_subsample[cyr], taxes_fullsample[cyr], reldiff) if msg: raise ValueError(msg)
def test_mtr(tests_path, puf_path): """ Test Tax-Calculator marginal tax rates with no policy reform using puf.csv Compute histograms for each marginal tax rate income type using sample input from the puf.csv file and writing output to a string, which is then compared for differences with EXPECTED_MTR_RESULTS. """ # pylint: disable=too-many-locals,too-many-statements assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES) # construct actual results string, res res = '' if MTR_NEG_DIFF: res += 'MTR computed using NEGATIVE finite_diff ' else: res += 'MTR computed using POSITIVE finite_diff ' res += 'for tax year {}\n'.format(MTR_TAX_YEAR) # create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.set_year(MTR_TAX_YEAR) # create a Records object (puf) containing puf.csv input records puf = Records(data=puf_path) recid = puf.RECID # pylint: disable=no-member # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=puf) res += '{} = {}\n'.format('Total number of data records', puf.array_length) res += 'PTAX mtr histogram bin edges:\n' res += ' {}\n'.format(PTAX_MTR_BIN_EDGES) res += 'ITAX mtr histogram bin edges:\n' res += ' {}\n'.format(ITAX_MTR_BIN_EDGES) variable_header = 'PTAX and ITAX mtr histogram bin counts for' # compute marginal tax rate (mtr) histograms for each mtr variable for var_str in Calculator.MTR_VALID_VARIABLES: zero_out = (var_str == 'e01400') (mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str, negative_finite_diff=MTR_NEG_DIFF, zero_out_calculated_vars=zero_out, wrt_full_compensation=False) if zero_out: # check that calculated variables are consistent assert np.allclose((calc.array('iitax') + calc.array('payrolltax')), calc.array('combined')) assert np.allclose((calc.array('ptax_was') + calc.array('setax') + calc.array('ptax_amc')), calc.array('payrolltax')) assert np.allclose(calc.array('c21060') - calc.array('c21040'), calc.array('c04470')) assert np.allclose(calc.array('taxbc') + calc.array('c09600'), calc.array('c05800')) assert np.allclose((calc.array('c05800') + calc.array('othertaxes') - calc.array('c07100')), calc.array('c09200')) assert np.allclose(calc.array('c09200') - calc.array('refund'), calc.array('iitax')) if var_str == 'e00200s': # only MARS==2 filing units have valid MTR values mtr_ptax = mtr_ptax[calc.array('MARS') == 2] mtr_itax = mtr_itax[calc.array('MARS') == 2] res += '{} {}:\n'.format(variable_header, var_str) res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid) res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid) # check for differences between actual and expected results mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt') with open(mtrres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)): new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(res) msg = 'PUFCSV MTR RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_mtr_actual.txt to ---\n' msg += '--- pufcsv_mtr_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg)
def test_agg(tests_path, puf_fullsample): """ Test Tax-Calculator aggregate taxes with no policy reform using the full-sample puf.csv and a small sub-sample of puf.csv """ # pylint: disable=too-many-locals,too-many-statements nyrs = 10 # create a baseline Policy object containing 2017_law.json parameters pre_tcja_jrf = os.path.join(tests_path, '..', 'reforms', '2017_law.json') pre_tcja = Calculator.read_json_param_objects(pre_tcja_jrf, None) baseline_policy = Policy() baseline_policy.implement_reform(pre_tcja['policy']) # create a Records object (rec) containing all puf.csv input records rec = Records(data=puf_fullsample) # create a Calculator object using baseline policy and puf records calc = Calculator(policy=baseline_policy, records=rec) calc_start_year = calc.current_year # create aggregate diagnostic table (adt) as a Pandas DataFrame object adt = calc.diagnostic_table(nyrs) taxes_fullsample = adt.loc["Combined Liability ($b)"] # convert adt results to a string with a trailing EOL character adtstr = adt.to_string() + '\n' # create actual and expected lists of diagnostic table lines actual = adtstr.splitlines(True) aggres_path = os.path.join(tests_path, 'pufcsv_agg_expect.txt') with open(aggres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt expect = expected_results.splitlines(True) # ensure actual and expect lines have differences no more than small value if sys.version_info.major == 2: small = 0.0 # tighter test for Python 2.7 else: small = 0.1 # looser test for Python 3.6 diffs = nonsmall_diffs(actual, expect, small) if diffs: new_filename = '{}{}'.format(aggres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(adtstr) msg = 'PUFCSV AGG RESULTS DIFFER FOR FULL-SAMPLE\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_agg_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_agg_actual.txt to ---\n' msg += '--- pufcsv_agg_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg) # create aggregate diagnostic table using unweighted sub-sample of records fullsample = puf_fullsample rn_seed = 180 # to ensure sub-sample is always the same subfrac = 0.05 # sub-sample fraction subsample = fullsample.sample(frac=subfrac, random_state=rn_seed) rec_subsample = Records(data=subsample) calc_subsample = Calculator(policy=baseline_policy, records=rec_subsample) adt_subsample = calc_subsample.diagnostic_table(nyrs) # compare combined tax liability from full and sub samples for each year taxes_subsample = adt_subsample.loc["Combined Liability ($b)"] reltol = 0.01 # maximum allowed relative difference in tax liability if not np.allclose(taxes_subsample, taxes_fullsample, atol=0.0, rtol=reltol): msg = 'PUFCSV AGG RESULTS DIFFER IN SUB-SAMPLE AND FULL-SAMPLE\n' msg += 'WHEN subfrac={:.3f}, rtol={:.4f}, seed={}\n'.format(subfrac, reltol, rn_seed) it_sub = np.nditer(taxes_subsample, flags=['f_index']) it_all = np.nditer(taxes_fullsample, flags=['f_index']) while not it_sub.finished: cyr = it_sub.index + calc_start_year tax_sub = float(it_sub[0]) tax_all = float(it_all[0]) reldiff = abs(tax_sub - tax_all) / abs(tax_all) if reldiff > reltol: msgstr = ' year,sub,full,reldiff= {}\t{:.2f}\t{:.2f}\t{:.4f}\n' msg += msgstr.format(cyr, tax_sub, tax_all, reldiff) it_sub.iternext() it_all.iternext() raise ValueError(msg)
def test_mtr(tests_path, puf_path): """ Test Tax-Calculator marginal tax rates with no policy reform using puf.csv Compute histograms for each marginal tax rate income type using sample input from the puf.csv file and writing output to a string, which is then compared for differences with EXPECTED_MTR_RESULTS. """ # pylint: disable=too-many-locals,too-many-statements assert len(PTAX_MTR_BIN_EDGES) == len(ITAX_MTR_BIN_EDGES) # construct actual results string, res res = '' if MTR_NEG_DIFF: res += 'MTR computed using NEGATIVE finite_diff ' else: res += 'MTR computed using POSITIVE finite_diff ' res += 'for tax year {}\n'.format(MTR_TAX_YEAR) # create a Policy object (clp) containing current-law policy parameters clp = Policy() clp.set_year(MTR_TAX_YEAR) # create a Records object (puf) containing puf.csv input records puf = Records(data=puf_path) recid = puf.RECID # pylint: disable=no-member # create a Calculator object using clp policy and puf records calc = Calculator(policy=clp, records=puf) res += '{} = {}\n'.format('Total number of data records', puf.array_length) res += 'PTAX mtr histogram bin edges:\n' res += ' {}\n'.format(PTAX_MTR_BIN_EDGES) res += 'ITAX mtr histogram bin edges:\n' res += ' {}\n'.format(ITAX_MTR_BIN_EDGES) variable_header = 'PTAX and ITAX mtr histogram bin counts for' # compute marginal tax rate (mtr) histograms for each mtr variable for var_str in Calculator.MTR_VALID_VARIABLES: zero_out = (var_str == 'e01400') (mtr_ptax, mtr_itax, _) = calc.mtr(variable_str=var_str, negative_finite_diff=MTR_NEG_DIFF, zero_out_calculated_vars=zero_out, wrt_full_compensation=False) if zero_out: # check that calculated variables are consistent assert np.allclose((calc.array('iitax') + calc.array('payrolltax')), calc.array('combined')) assert np.allclose((calc.array('ptax_was') + calc.array('setax') + calc.array('ptax_amc')), calc.array('payrolltax')) assert np.allclose(calc.array('c21060') - calc.array('c21040'), calc.array('c04470')) assert np.allclose(calc.array('taxbc') + calc.array('c09600'), calc.array('c05800')) assert np.allclose((calc.array('c05800') + calc.array('othertaxes') - calc.array('c07100')), calc.array('c09200')) assert np.allclose(calc.array('c09200') - calc.array('refund'), calc.array('iitax')) if var_str == 'e00200s': # only MARS==2 filing units have valid MTR values mtr_ptax = mtr_ptax[calc.array('MARS') == 2] mtr_itax = mtr_itax[calc.array('MARS') == 2] res += '{} {}:\n'.format(variable_header, var_str) res += mtr_bin_counts(mtr_ptax, PTAX_MTR_BIN_EDGES, recid) res += mtr_bin_counts(mtr_itax, ITAX_MTR_BIN_EDGES, recid) # check for differences between actual and expected results mtrres_path = os.path.join(tests_path, 'pufcsv_mtr_expect.txt') with open(mtrres_path, 'r') as expected_file: txt = expected_file.read() expected_results = txt.rstrip('\n\t ') + '\n' # cleanup end of file txt if nonsmall_diffs(res.splitlines(True), expected_results.splitlines(True)): new_filename = '{}{}'.format(mtrres_path[:-10], 'actual.txt') with open(new_filename, 'w') as new_file: new_file.write(res) msg = 'PUFCSV MTR RESULTS DIFFER\n' msg += '-------------------------------------------------\n' msg += '--- NEW RESULTS IN pufcsv_mtr_actual.txt FILE ---\n' msg += '--- if new OK, copy pufcsv_mtr_actual.txt to ---\n' msg += '--- pufcsv_mtr_expect.txt ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg)