def tax(flat_tax, total_type="person"): """Calculate all metrics given a flat tax. Args: flat_tax: Percentage tax rate (0-100). total_type: Whether to use total population and current tax liability from SPM units or persons. Either "person" or "spmu". Defaults to "person". """ flat_tax /= 100 spmu["new_tax"] = spmu.spmu_agi * flat_tax new_revenue = mdf.weighted_sum(spmu, "new_tax", "spm_weight") change_revenue = new_revenue - totals.loc[total_type].fica_fedtax_ac ubi = change_revenue / totals.loc[total_type].person spmu["new_spm_resources"] = (spmu.spm_resources_before_tax + ubi * spmu.spmu_total_people - spmu.new_tax) # Merge back to each person. target_persons = person.merge(spmu[SPM_COLS + ["new_spm_resources"]], on=SPM_COLS) target_persons["new_spm_resources_pp"] = ( target_persons.new_spm_resources / target_persons.spmu_total_people) # Calculate poverty rate target_persons["new_poor"] = (target_persons.new_spm_resources < target_persons.spm_povthreshold) poverty_rate = mdf.weighted_mean(target_persons, "new_poor", "marsupwt") change_poverty_rate = chg(poverty_rate, initial_poverty_rate) # Calculate poverty gap poverty_gaps = np.maximum(spmu.spm_povthreshold - spmu.new_spm_resources, 0) poverty_gap = (poverty_gaps * spmu.spm_weight).sum() change_poverty_gap = chg(poverty_gap, initial_poverty_gap) # Calculate Gini gini = mdf.gini(target_persons, "new_spm_resources_pp", w="marsupwt") change_gini = chg(gini, initial_gini) # Percent winners target_persons["better_off"] = (target_persons.new_spm_resources > target_persons.spm_resources) percent_better_off = mdf.weighted_mean(target_persons, "better_off", "marsupwt") return pd.Series({ "poverty_rate": poverty_rate, "poverty_gap": poverty_gap, "gini": gini, "percent_better_off": percent_better_off, "change_poverty_rate": change_poverty_rate, "change_poverty_gap": change_poverty_gap, "change_gini": change_gini, "change_revenue": change_revenue, "ubi": ubi, })
def pv_rate(column): return ( mdf.weighted_mean( target_persons[target_persons[column]], "poor", "asecwt" ) * 100 )
def pov(race, monthly_ubi): # Total cost and associated tax rate. cost = monthly_ubi * total_population * 12 tax_rate = cost / total_agi # Calculate new tax, UBI and resources per SPM unit. spm["new_spm_resources"] = ( spm.spm_resources - (tax_rate * spm.spm_agi_pos) + # New tax (12 * monthly_ubi * spm.spm_numper)) # UBI # Merge back to person. person2 = person.merge(spm[["spm_id", "new_spm_resources"]], on="spm_id") # Based on new resources, calculate person2["new_poor"] = person2.new_spm_resources < person2.spm_povthreshold # Calculate poverty rate for specified race. poverty_rate = mdf.weighted_mean(person2[person2[race.lower()]], "new_poor", "weight") # Calculate poverty gap for specified race. poverty_gap = pov_gap(spm[spm["spm_" + race.lower()] > 0], "new_spm_resources", "spm_povthreshold", "spm_weight") poverty_gap_per_capita = (poverty_gap / pop_in_race_spmu[race]) return pd.Series({ "poverty_rate": poverty_rate, "poverty_gap_per_capita": poverty_gap_per_capita })
def pov_rate(column): return ( mdf.weighted_mean( target_persons[target_persons[column]], "original_poor", "asecwt", ) * 100 )
def test_weighted_mean(): # Test umweighted. assert mdf.weighted_mean(df, "x") == 8 / 3 # Test weighted. assert mdf.weighted_mean(df, "x", "w") == 11 / 6 # Test weighted with multiple columns. assert mdf.weighted_mean(df, ["x", "y"], "w").tolist() == [11 / 6, -3 / 6] # Test grouped. mdf.weighted_mean(dfg, "x", "w", "g") mdf.weighted_mean(dfg, ["x", "y"], "w", "g")
def pov(data, group): return pd.DataFrame( mdf.weighted_mean(data, "poverty_flag", "asecwt", groupby=group))
# Calculate totals at both person and SPM unit levels so we can compare and # calculate poverty gaps. person_totals = mdf.weighted_sum(person, ["fica_fedtax_ac", "person"], "marsupwt") spmu_totals = mdf.weighted_sum(spmu, ["spmu_fica_fedtax_ac", "spmu_total_people"], "spm_weight") spmu_totals.index = person_totals.index totals = pd.concat([person_totals, spmu_totals], axis=1).T totals.index = ["person", "spmu"] # Calculate status quo person["poor"] = person.spm_resources < person.spm_povthreshold initial_poverty_rate = mdf.weighted_mean(person, "poor", "marsupwt") spmu["initial_poverty_gap"] = np.maximum( spmu.spm_povthreshold - spmu.spm_resources, 0) initial_poverty_gap = (spmu.initial_poverty_gap * spmu.spm_weight).sum() person["spm_resources_pp"] = person.spm_resources / person.spmu_total_people initial_gini = mdf.gini(person, "spm_resources_pp", w="marsupwt") def chg(new, base): return (100 * (new - base) / base).round(1) def tax(flat_tax, total_type="person"): """Calculate all metrics given a flat tax.
mdf.add_weighted_quantiles(temp, 'e00200', 's006') cps2 = pd.concat([cps2, temp], ignore_index=True) cps2['income_bin'] = pd.cut(cps2['e00200_percentile_exact'], [0, 25, 50, 70, 90, 99, 100], right=False) cps2 = cps2.drop([ 'e00200_percentile_exact', 'e00200_percentile', 'e00200_2percentile', 'e00200_ventile', 'e00200_decile', 'e00200_quintile', 'e00200_quartile' ], axis=1) cps2 = cps2.groupby([ 'income_bin', 'age_head' ]).apply(lambda x: pd.Series({ 'nu18': mdf.weighted_mean(x, 'nu18', 's006'), 'n1864': mdf.weighted_mean(x, 'n1864', 's006'), 'n65': mdf.weighted_mean(x, 'n65', 's006'), })) cps2.reset_index(inplace=True) cps2[cps2['age_head'].between(20, 80)] smoothed18 = [] for group in cps2['income_bin'].unique(): temp = cps2[cps2['income_bin'] == group] x = temp["age_head"] y = temp["nu18"] # plt.plot(x, y, label=group) z18 = lowess(y, x, frac=0.4) plt.plot(z18[:, 0], z18[:, 1], label=group) smoothed18.extend(z18[:, 1])
def test_weighted_mean(): # TODO: Add None default to w. # assert mdf.weighted_mean(df, 'x') == 8 / 3 assert mdf.weighted_mean(df, "x", "w") == 11 / 6
cdfs = pd.concat(cdf_list) # Create DataFrame summarized at the UBI amount, with columns for: # - d_stat and associated net worth # - median and mean net worth by white/black # - share with net worth above $50k by white/black cdfs_max = (cdfs.sort_values("d_stat_cand", ascending=False).groupby("ubi_mo").head(1)) ubi_summary = (simulated.groupby("ubi_mo").apply(lambda x: pd.Series({ "black_median_networth_pa": mdf.weighted_median(x[x.race2 == "Black"], "networth_pa_new", "wgt"), "white_median_networth_pa": mdf.weighted_median(x[x.race2 == "White"], "networth_pa_new", "wgt"), "black_mean_networth_pa": mdf.weighted_mean(x[x.race2 == "Black"], "networth_pa_new", "wgt"), "white_mean_networth_pa": mdf.weighted_mean(x[x.race2 == "White"], "networth_pa_new", "wgt"), "black_share_above_50k": x[(x.race2 == "Black") & (x.networth_pa_new >= 50000)].wgt.sum() / totals.black_hhs, "white_share_above_50k": x[(x.race2 == "White") & (x.networth_pa_new >= 50000)].wgt.sum() / totals.white_hhs, })).reset_index()) ubi_summary["white_mean_nw_as_pct_of_mean_black"] = ( ubi_summary.white_mean_networth_pa / ubi_summary.black_mean_networth_pa) ubi_summary["white_median_nw_as_pct_of_median_black"] = ( ubi_summary.white_median_networth_pa / ubi_summary.black_median_networth_pa)
def pov(groupby, data=person_sim): return ( data.groupby(groupby) .apply(lambda x: mdf.weighted_mean(x, "poverty_flag", "asecwt")) .reset_index() )