def recalculate(df): """Recalculates fields in the DataFrame for after components have changed. :param df: DataFrame for use in microdf. :returns: Nothing. Updates the DataFrame in place. """ # Recalculate TPC's Expanded Cash Income measure. cols = df.columns if "tpc_eci" in cols: df.tpc_eci = mdf.tpc_eci(df) # Recalculate weighted metrics (anything ending in _m). mcols = cols[cols.str.endswith("_m")] mdf.add_weighted_metrics(df, mcols)
def recalculate(df): """ Recalculates fields in the DataFrame for after components have changed. Args: df: DataFrame for use in microdf. Returns: Nothing. Updates the DataFrame in place. """ # Recalculate aggregate income measures. AGG_INCOME_MEASURES = ['expanded_income', 'aftertax_income', 'tpc_eci'] cols = df.columns if 'tpc_eci' in cols: df.tpc_eci = mdf.tpc_eci(df) # Recalculate weighted metrics (anything ending in _m). mcols = cols[cols.str.endswith('_m')] mdf.add_weighted_metrics(df, mcols)
'31': 'Nebraska', '32':'Nevada', '33': 'New Hampshire', '34': 'New Jersey', '35': 'New Mexico', '36':'New York', '37':'North Carolina', '38':'North Dakota', '39': 'Ohio', '40':'Oklahoma', '41': 'Oregon', '42':'Pennslyvania', '44':'Rhode Island','45':'South Carolina', '46':'South Dakota', '47': 'Tennessee', '48':'Texas','49':'Utah','50':'Vermont', '51':'Virginia', '53':'Washington', '54':'West Virginia', '55':'Wisconsin', '56':'Wyoming'},inplace=True) person['child'] = person.age < 18 person['adult'] = person.age >= 18 ages = person.groupby(['spmfamunit', 'year'])[['child','adult']].sum() ages.columns = ['total_children', 'total_adults'] person = person.merge(ages,left_on=['spmfamunit', 'year'], right_index=True) person['total_people'] = person.total_children + person.total_adults mdf.add_weighted_metrics(person, ['child', 'adult'], 'weight') child_pop = person.groupby('state')[['child']].sum()/3 def ca_pov(state, age_group, ca_monthly=0): target_persons = person[person.state==state].copy(deep=True) total_population = target_persons.weight.sum() adult_population = (target_persons.weight * target_persons.adult).sum() child_population = (target_persons.weight * target_persons.child).sum() if age_group == 'child': target_persons = target_persons[target_persons.child] if age_group == 'adult': target_persons = target_persons[target_persons.adult] spending = child_pop/(ca_monthly * 12)
def calc_df( records=None, policy=None, year=2020, reform=None, group_vars=None, metric_vars=None, group_n65=False, ): """Creates a pandas DataFrame for given Tax-Calculator data. s006 is always included, and RECID is used as an index. :param records: An optional Records object. If not provided, uses CPS records. (Default value = None) :param policy: An optional Policy object. If not provided, uses default Policy. :param year: An optional year to advance to. If not provided, defaults to 2020. :param reform: An optional reform to implement for the Policy object. (Default value = None) :param group_vars: An optional list of column names to include in the DataFrame. (Default value = None) :param metric_vars: An optional list of column names to include and calculate weighted sums of (in millions named as *_m) in the DataFrame. (Default value = None) :param group_n65: Whether to calculate and group by n65. Defaults to False. :returns: A pandas DataFrame. market_income is also always calculated. """ tc = import_optional_dependency("taxcalc") # Assign defaults. if records is None: records = tc.Records.cps_constructor() if policy is None: policy = tc.Policy() if reform is not None: policy.implement_reform(reform) # Calculate. calc = tc.Calculator(records=records, policy=policy, verbose=False) calc.advance_to_year(year) calc.calc_all() # Get a deduplicated list of all columns. if group_n65: group_vars = group_vars + [ "age_head", "age_spouse", "elderly_dependents", ] # Include expanded_income and benefits to produce market_income. all_cols = mdf.listify( [ "RECID", "s006", "expanded_income", "aftertax_income", mdf.BENS, group_vars, metric_vars, ] ) df = calc.dataframe(all_cols) # Create core elements. df["market_income"] = mdf.market_income(df) df["bens"] = df[mdf.BENS].sum(axis=1) df["tax"] = df.expanded_income - df.aftertax_income if group_n65: df["n65"] = n65(df.age_head, df.age_spouse, df.elderly_dependents) df.drop( ["age_head", "age_spouse", "elderly_dependents"], axis=1, inplace=True, ) # Add calculated columns for metrics. mdf.add_weighted_metrics(df, metric_vars) # Set RECID to int and set it as index before returning. df["RECID"] = df.RECID.map(int) return df.set_index("RECID")
reform = mdf.calc_df(reform=CG_REFORM, group_vars=['MARS'], group_n65=True, metric_vars=['aftertax_income', 'XTOT']) reform.columns ### Calculate senior UBI. Start with total revenue ($ billions). new_rev_m = base.aftertax_income_m.sum() - reform.aftertax_income_m.sum() new_rev_m / 1e3 How many seniors are there? mdf.add_weighted_metrics(reform, 'n65') n65_total_m = reform.n65_m.sum() n65_total_m Divide. senior_ubi = new_rev_m / reform.n65_m.sum() senior_ubi ### Add senior UBI to `aftertax_income` and recalculate reform['ubi'] = senior_ubi * reform.n65 reform['aftertax_income'] = reform.aftertax_income + reform.ubi mdf.add_weighted_metrics(reform, 'aftertax_income')
import microdf as mdf tc.__version__ ## Load data Start with a standard `DataFrame`, then add a UBI manually in a reform copy. base = mdf.calc_df(group_vars=['expanded_income', 'MARS', 'XTOT'], metric_vars='aftertax_income') reform = base.copy(deep=True) UBI_PP = 10000 reform['ubi'] = reform.XTOT * UBI_PP reform['aftertax_income'] = reform.aftertax_income + reform.ubi mdf.add_weighted_metrics(reform, 'aftertax_income') ## `agg` ### Change in aftertax income by marital status. mdf.agg(base, reform, 'MARS', 'aftertax_income') ### Also sum baseline `expanded_income` mdf.agg(base, reform, 'MARS', 'aftertax_income', 'expanded_income') ### Also sum UBI amount mdf.add_weighted_metrics(reform, 'ubi') # Creates ubi_m = ubi * s006 / 1e6.
def calc_df(records=None, policy=None, year=2019, reform=None, group_vars=None, metric_vars=None, group_n65=False): """Creates a pandas DataFrame for given Tax-Calculator data. s006 is always included, and RECID is used as an index. Args: records: An optional Records object. If not provided, uses CPS records. policy: An optional Policy object. If not provided, uses default Policy. year: An optional year to advance to. If not provided, defaults to 2019. reform: An optional reform to implement for the Policy object. group_vars: An optional list of column names to include in the DataFrame. metric_vars: An optional list of column names to include and calculate weighted sums of (in millions named as *_m) in the DataFrame. group_n65: Whether to calculate and group by n65. Defaults to False. Returns: A pandas DataFrame. market_income is also always calculated. """ tc = import_optional_dependency("taxcalc") # Assign defaults. if records is None: records = tc.Records.cps_constructor() if policy is None: policy = tc.Policy() if reform is not None: policy.implement_reform(reform) # Calculate. calc = tc.Calculator(records=records, policy=policy, verbose=False) calc.advance_to_year(year) calc.calc_all() # TODO: Make n65, ECI, etc. part of the list of columns you can request. # Get a deduplicated list of all columns. if group_n65: group_vars = group_vars + ['age_head', 'age_spouse', 'elderly_dependents'] # Include expanded_income and benefits to produce market_income. all_cols = mdf.listify( ['RECID', 's006', 'expanded_income', 'aftertax_income', mdf.BENS, group_vars, metric_vars]) df = calc.dataframe(all_cols) # Create core elements. df['market_income'] = mdf.market_income(df) df['bens'] = df[mdf.BENS].sum(axis=1) df['tax'] = df.expanded_income - df.aftertax_income if group_n65: df['n65'] = n65(df.age_head, df.age_spouse, df.elderly_dependents) df.drop(['age_head', 'age_spouse', 'elderly_dependents'], axis=1, inplace=True) # Add calculated columns for metrics. mdf.add_weighted_metrics(df, metric_vars) # Set RECID to int and set it as index before returning. df['RECID'] = df.RECID.map(int) return df.set_index('RECID')