示例#1
0
def test_add_quantile_bins():
    dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
    dfb = add_quantile_bins(dfx, 'expanded_income', 100,
                            weight_by_income_measure=False)
    bin_labels = dfb['bins'].unique()
    default_labels = set(range(1, 101))
    for lab in bin_labels:
        assert lab in default_labels
    dfb = add_quantile_bins(dfx, 'expanded_income', 100,
                            weight_by_income_measure=True)
    assert 'bins' in dfb
示例#2
0
 def fuzz(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz):
     """
     Fuzz some df2 records in each bin defined by bin_type and imeasure.
     The fuzzed records have their post-reform tax results (in df2)
     set to their pre-reform tax results (in df1).
     """
     # pylint: disable=too-many-arguments
     assert bin_type == 'dec' or bin_type == 'bin' or bin_type == 'agg'
     if bin_type == 'dec':
         df2 = add_quantile_bins(df2, imeasure, 10)
     elif bin_type == 'bin':
         df2 = add_income_bins(df2, imeasure, bins=WEBAPP_INCOME_BINS)
     else:
         df2 = add_quantile_bins(df2, imeasure, 1)
     gdf2 = df2.groupby('bins')
     df2['nofuzz'] = gdf2['mask'].transform(chooser)
     for col in cols_to_fuzz:
         df2[col + suffix] = (df2[col] * df2['nofuzz'] -
                              df1[col] * df2['nofuzz'] + df1[col])
示例#3
0
def test_add_quantile_bins():
    dfx = pd.DataFrame(data=DATA, columns=['expanded_income', 's006', 'label'])
    dfb = add_quantile_bins(dfx, 'expanded_income', 100,
                            weight_by_income_measure=False)
    bin_labels = dfb['bins'].unique()
    default_labels = set(range(1, 101))
    for lab in bin_labels:
        assert lab in default_labels
    # custom labels
    dfb = add_quantile_bins(dfx, 'expanded_income', 100,
                            weight_by_income_measure=True)
    assert 'bins' in dfb
    custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    dfb = add_quantile_bins(dfx, 'expanded_income', 10,
                            labels=custom_labels)
    assert 'bins' in dfb
    bin_labels = dfb['bins'].unique()
    for lab in bin_labels:
        assert lab in custom_labels
示例#4
0
 def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
     """
     Create additional df2 columns.  If do_fuzzing is True, also
     fuzz some df2 records in each bin defined by bin_type and imeasure
     with the fuzzed records having their post-reform tax results (in df2)
     set to their pre-reform tax results (in df1).
     """
     # pylint: disable=too-many-arguments
     assert bin_type == 'dec' or bin_type == 'bin' or bin_type == 'agg'
     if bin_type == 'dec':
         df2 = add_quantile_bins(df2, imeasure, 10)
     elif bin_type == 'bin':
         df2 = add_income_bins(df2, imeasure, bins=STANDARD_INCOME_BINS)
     else:
         df2 = add_quantile_bins(df2, imeasure, 1)
     gdf2 = df2.groupby('bins')
     if do_fuzzing:
         df2['nofuzz'] = gdf2['mask'].transform(chooser)
     else:  # never do any results fuzzing
         df2['nofuzz'] = np.ones(df2.shape[0], dtype=np.int8)
     for col in cols_to_fuzz:
         df2[col + suffix] = (df2[col] * df2['nofuzz'] -
                              df1[col] * df2['nofuzz'] + df1[col])
示例#5
0
 def write_decile_table(dfx, tfile, tkind='Totals'):
     """
     Write to tfile the tkind decile table using dfx DataFrame.
     """
     dfx = add_quantile_bins(dfx, 'expanded_income', 10,
                             weight_by_income_measure=False)
     gdfx = dfx.groupby('bins', as_index=False)
     rtns_series = gdfx.apply(unweighted_sum, 's006')
     xinc_series = gdfx.apply(weighted_sum, 'expanded_income')
     itax_series = gdfx.apply(weighted_sum, 'iitax')
     ptax_series = gdfx.apply(weighted_sum, 'payrolltax')
     htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
     ctax_series = gdfx.apply(weighted_sum, 'combined')
     # write decile table to text file
     row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
     tfile.write(row.format(tkind))
     rowfmt = '{}{}{}{}{}{}\n'
     row = rowfmt.format('    Returns',
                         '    ExpInc',
                         '    IncTax',
                         '    PayTax',
                         '     LSTax',
                         '    AllTax')
     tfile.write(row)
     row = rowfmt.format('       (#m)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)',
                         '      ($b)')
     tfile.write(row)
     rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
     for decile in range(0, 10):
         row = '{:2d}'.format(decile)
         row += rowfmt.format(rtns_series[decile] * 1e-6,
                              xinc_series[decile] * 1e-9,
                              itax_series[decile] * 1e-9,
                              ptax_series[decile] * 1e-9,
                              htax_series[decile] * 1e-9,
                              ctax_series[decile] * 1e-9)
         tfile.write(row)
     row = ' A'
     row += rowfmt.format(rtns_series.sum() * 1e-6,
                          xinc_series.sum() * 1e-9,
                          itax_series.sum() * 1e-9,
                          ptax_series.sum() * 1e-9,
                          htax_series.sum() * 1e-9,
                          ctax_series.sum() * 1e-9)
     tfile.write(row)