def child_benefits_data(start, end): """ Data preparation for kindergeld parameters. Returns a dataframe. Parameters: start (Int): Defines the start of the simulated period end (Int): Defines the end of the simulated period """ # Calculate simulation period years = range(start, end + 1) # Data preparation for Kindergeld params kindergeld_df = pd.DataFrame() for i in years: policy_params, policy_functions = set_up_policy_environment(i) kindergeld_df[i] = policy_params["kindergeld"]["kindergeld"].values() kindergeld_df = kindergeld_df.transpose() kindergeld_labels = [ "First child", "Second child", "Third child", "Fourth child" ] kindergeld_df.columns = kindergeld_labels return kindergeld_df
def social_assistance_data(start, end): """ For a year range returns the policy parameters to plot the social security contributions start (Int): Defines the start of the simulated period end (Int): Defines the end of the simulated period returns: soz_ass_out: pd.DataFrame """ years = range(start, end + 1) soz_ass_dict = {} for i in years: policy_params, policy_functions = set_up_policy_environment(i) if i <= 2010: anteil_regelsatz = policy_params["arbeitsl_geld_2"][ "anteil_regelsatz"] anteil_regelsatz["ein_erwachsener"] = 1 regelsätze = (np.array(list(anteil_regelsatz.values())) * policy_params["arbeitsl_geld_2"]["regelsatz"]) soz_ass_dict[i] = dict(zip(anteil_regelsatz.keys(), regelsätze)) else: soz_ass_dict[i] = dict( zip( [ "ein_erwachsener", "zwei_erwachsene", "weitere_erwachsene", "kinder_14_24", "kinder_7_13", "kinder_0_6", ], policy_params["arbeitsl_geld_2"]["regelsatz"].values(), )) soz_ass_df = pd.DataFrame.from_dict(soz_ass_dict, orient="index") soz_ass_out = soz_ass_df[[ "ein_erwachsener", "zwei_erwachsene", "weitere_erwachsene", "kinder_14_24", "kinder_7_13", "kinder_0_6", ]] return soz_ass_out
def test_pension(input_data, year): column = "rente_anspr_m" year_data = input_data[input_data["jahr"] == year] df = year_data[INPUT_COLS].copy() policy_params, policy_functions = set_up_policy_environment( date=f"{year}-07-01") calc_result = compute_taxes_and_transfers( data=df, params=policy_params, functions=policy_functions, targets=column, ) assert_series_equal(calc_result[column].round(2), year_data[column])
def test_update_earning_points(input_data, year): year_data = input_data[input_data["jahr"] == year] df = year_data[INPUT_COLS].copy() policy_params, policy_functions = set_up_policy_environment( date=f"{year}-07-01") calc_result = compute_taxes_and_transfers( data=df, params=policy_params, functions=policy_functions, targets="entgeltpunkte_update", ) assert_series_equal(calc_result["entgeltpunkte_update"], year_data["EP_end"], check_names=False)
def test_synthetic(): """ Test creation of synthetic data """ # run with defaults df = create_synthetic_data() # rent must be positive assert df["kaltmiete_m_hh"].min() > 0 # heating cost must be positive assert df["heizkosten_m_hh"].min() > 0 # no NaN values assert df.notna().all().all() # correct dimensions for every household type assert len(df[df["hh_typ"] == "couple_0_children"] == 2) assert len(df[df["hh_typ"] == "single_2_children"] == 3) assert len(df[df["hh_typ"] == "couple_2_children"] == 4) # unique personal id? assert df["p_id"].is_unique doppelverdiener = create_synthetic_data(hh_typen=["couple"], n_children=[0], double_earner=True, bruttolohn_m=2000) assert (doppelverdiener["bruttolohn_m"] > 0).all() # test heterogeneity incrange = create_synthetic_data( hh_typen=["couple"], n_children=0, heterogeneous_vars={ "bruttolohn_m": list(np.arange(0, 6000, 1000)), "vermögen_hh": [10_000, 500_000, 1_000_000], }, ) # is household id unique? assert (incrange.groupby("hh_id").size() == 2).all() assert incrange.notna().all().all() # finally, run through gettsim policy_params, policy_functions = set_up_policy_environment(2020) results = compute_taxes_and_transfers(df, policy_params, policy_functions) assert len(results) == len(df)
def social_security_data(start, end): """ For a year range returns the policy parameters to plot the social security contributions start (Int): Defines the start of the simulated period end (Int): Defines the end of the simulated period returns dataframe """ years = range(start, end + 1) soz_vers_dict = {} for i in years: policy_params, policy_functions = set_up_policy_environment(i) soz_vers_dict[i] = policy_params["soz_vers_beitr"]["soz_vers_beitr"] soz_vers_df = pd.DataFrame(data=soz_vers_dict).transpose() # Dictionary entries into columns ges_krankenv = soz_vers_df["ges_krankenv"].apply(pd.Series) pflegev = soz_vers_df["pflegev"].apply(pd.Series) # soz_vers_out = pd.concat( [soz_vers_df[["arbeitsl_v", "rentenv"]], ges_krankenv, pflegev], axis=1) soz_vers_out.columns = [ "unemployment insurance", "pension insurance", "health insurance employer", "health insurance employee", "care insurance", "additional care insurance no child", ] # We don't need the top-up for childless persons soz_vers_out = soz_vers_out.drop( columns=["additional care insurance no child"]) return soz_vers_out
def tax_rate_data(start, end): """ For a given year span returns the policy parameters to plot income tax rate per income sel_year (Int): The year for which the data will be simulated. The range for which parameters can be simulated is 2002-2020. returns dict """ years = range(start, end + 1) einkommen = pd.Series(data=np.linspace(0, 300000, 601)) tax_rate_dict_full = {} for i in years: policy_params, policy_functions = set_up_policy_environment(i) eink_params = policy_params["eink_st"] soli_params = policy_params["soli_st"]["soli_st"] eink_tax = st_tarif(einkommen, eink_params) soli = piecewise_polynomial( eink_tax, thresholds=soli_params["thresholds"], rates=soli_params["rates"], intercepts_at_lower_thresholds=soli_params[ "intercepts_at_lower_thresholds"], ) marginal_rate = np.gradient(eink_tax, einkommen) overall_marginal_rate = np.gradient(eink_tax + soli, einkommen) tax_rate_dict_full[i] = { "tax_rate": (eink_tax / einkommen), "overall_tax_rate": ((soli + eink_tax) / einkommen), "marginal_rate": pd.Series(marginal_rate), "overall_marginal_rate": pd.Series(overall_marginal_rate), "income": einkommen, } return tax_rate_dict_full
def heatmap_data(): LI = pd.Series(data=np.linspace(0, 310000, 250)) # Labor Income CI = pd.Series(data=np.linspace(0, 100000, 250)) # Capital Income # Get relevant policy params from GETTSIM policy_params, policy_functions = set_up_policy_environment(2020) CD = policy_params["eink_st_abzuege"]["sparerpauschbetrag"] CTau = policy_params["abgelt_st"][ "abgelt_st_satz"] # Capital income tax rate TCI = CI - CD # taxable capital income TCI[TCI < 0] = 0 # replace negative taxable income CT = TCI * CTau # Capital income tax heatmap_df = pd.DataFrame(columns=LI) # Iterate through LI and CI combinations for separate taxes for i in range(len(LI)): this_column = heatmap_df.columns[i] e = pd.Series(data=[LI[i]] * len(LI)) c = e + CI heatmap_df[this_column] = (st_tarif(c, policy_params["eink_st"])) - ( st_tarif(e, policy_params["eink_st"]) + CT) heatmap_df.index = CI heatmap_source = pd.DataFrame(heatmap_df.stack(), columns=["Change to tax burden" ]).reset_index() heatmap_source.columns = [ "Capital income", "Labor income", "Change to tax burden", ] # Data to show where average household per decile is located in heatmap deciles = ["", "", "", "", "", "", "", "", "", "P90", "P95", "P99", "P100"] capital_income_tax = pd.Series( data=[0, 0, 0, 0, 0, 4, 15, 36, 52, 84, 167, 559, 13873]) # from Bach & Buslei 2017 table 3-2 capital_income = capital_income_tax / 0.26375 total_income = pd.Series(data=[ 0, -868, 4569, 9698, 14050, 18760, 23846, 29577, 36769, 47676, 63486, 95899, 350423, ]) # from Bach & Buslei 2017 table 3-2 "Äquivalenzgewichtetes Einkommen" labor_income = total_income - capital_income household_dict = { "deciles": deciles, "capital_income": capital_income, "labor_income": labor_income, } return { "heatmap_source": heatmap_source, "household_dict": household_dict, }
def individiual_view_data(): LI = pd.Series(data=range(0, 250001, 500)) # Labor Income CI = pd.Series(data=range(0, 250001, 500)) # Capital Income # np.linspace(-1, 300001, 300001) LD = 0.2 * LI # Assumption TTI = LI + CI # Total Income TD = 0.2 * TTI # Assumption TI = TTI - TD # taxable income # Calculate variables separated taxes TLI = LI - LD # taxable labor income # Get relevant policy params from GETTSIM policy_params, policy_functions = set_up_policy_environment(2020) Tau_flat = ( (st_tarif(TLI, policy_params["eink_st"]) / TLI).fillna(0).round(2) ) # Income tax rate - flat Tau_integrated = ( (st_tarif(TI, policy_params["eink_st"]) / TI).fillna(0).round(2) ) # Income tax rate - integrated CD = pd.Series( data=[policy_params["eink_st_abzuege"]["sparerpauschbetrag"]] * len(LI)) # Capital income deductions CTau = policy_params["abgelt_st"]["abgelt_st_satz"] # Capital tax rate TCI = CI - CD # taxable capital income TCI[TCI < 0] = 0 # replace negative taxable income # Calculate variables integrated taxes T = (TI * Tau_integrated).round(2) # Total tax # taxable capital income LT = (TLI * Tau_flat).round(2) # Labor income tax CT = TCI * CTau # Capital income tax # Net incomes NCI = TCI - CT # Capital NLI = (TLI - LT).round(2) # Labor NI = (TI - T).round(2) # Total # blank placeholder B = [0] * len(LI) data_full = { "x_range": [ "Gross income (S)", "Taxable income (S)", "Net income (S)", "Gross income (R)", "Taxable income (R)", "Net income (R)", ], "CI": [CI, B, B, CI, B, B], "LI": [LI, B, B, LI, B, B], "TI": [B, B, B, B, TI, B], "NI": [B, B, B, B, B, NI], "T": [B, B, B, B, B, T], "CD": [B, CD, CD, B, B, B], "LD": [B, LD, B, B, B, B], "TCI": [B, TCI, B, B, B, B], "TLI": [B, TLI, B, B, B, B], "CT": [B, B, CT, B, B, B], "LT": [B, B, LT, B, B, B], "NCI": [B, B, NCI, B, B, B], "NLI": [B, B, NLI, B, B, B], "TD": [B, B, B, B, TD, B], "LI_list": ["LI", "TLI", "LT", "NLI", "LD"], "CI_list": ["CI", "CD", "TCI", "CT", "NCI"], "Total_list": ["TI", "NI", "T", "TD"], "Final_order": [ "CI", "LI", "CD", "TCI", "TLI", "TI", "LD", "TD", "CT", "NCI", "NLI", "LT", "NI", "T", ], } return data_full
def prepare_wg_data(sel_year, hh_size): """ For a given year and household_size this function creates the simulation dataframe later used for plotting. Parameters: sel_year: Int The year for which the wohngeld will be simulated hh_size: Int The size of the houshold for which wohngeld will be simulated. Values between 1 and 13. More than 12 just adds a lump-sum on top Returns dataframe. """ # Retrieve policy parameters for the selected year policy_params, policy_functions = set_up_policy_environment(sel_year) params = policy_params["wohngeld"] # Range of relevant income and rent combinations for the simulation einkommen = pd.Series(data=np.linspace(0, 4000, 81)) miete = pd.Series(data=np.linspace(0, 2000, 81)) household_size = pd.Series(data=[hh_size] * len(einkommen)) # Miete needs to be corrected acc. to mietstufe and hh size if sel_year <= 2008: wohngeld_miete = wohngeld_miete_bis_2008( pd.Series([3] * len(miete)), pd.Series([1980] * len(miete)), household_size, pd.Series(range(len(miete))), miete, pd.Series([1] * len(miete)), wohngeld_min_miete(household_size, params), params, ) if 2009 <= sel_year <= 2020: wohngeld_miete = wohngeld_miete_ab_2009( pd.Series([3] * len(miete)), household_size, pd.Series(range(len(miete))), miete, pd.Series([1] * len(miete)), wohngeld_min_miete(household_size, params), params, ) if sel_year >= 2021: wohngeld_miete = wohngeld_miete_ab_2021( pd.Series([3] * len(miete)), household_size, pd.Series(range(len(miete))), miete, pd.Series([1] * len(miete)), wohngeld_min_miete(household_size, params), params, ) # Create a dataframe for the simulated data wohngeld_df = pd.DataFrame(columns=einkommen) # To-do think about household["Mietstufe"] # Iterate through einkommen for all einkommen and miete combinations for i in range(len(einkommen)): this_column = wohngeld_df.columns[i] e = pd.Series(data=[einkommen[i]] * len(einkommen)) wohngeld_df[this_column] = wohngeld_basis( haushaltsgröße=household_size, # Account for minimum income wohngeld_eink=np.maximum(e, params["min_eink"][hh_size]), wohngeld_miete=wohngeld_miete, wohngeld_params=params, ) wohngeld_df.index = miete return wohngeld_df
def deduction_data(start, end): """ Data preparation for income tax deduction parameters. Return a dataframe. Parameters: start (Int): Defines the start of the simulated period end (Int): Defines the end of the simulated period """ # Period for simulation: years = range(start, end + 1) eink_ab_df = pd.DataFrame() # input older grundfreibetrag values by hand grundfreibetrag = { 2001: 14093 / 1.95583, 2000: 13499 / 1.95583, 1999: 13067 / 1.95583, 1998: 12365 / 1.95583, 1997: 12095 / 1.95583, 1996: 12095 / 1.95583, 1995: 5616 / 1.95583, 1994: 5616 / 1.95583, 1993: 5616 / 1.95583, 1992: 5616 / 1.95583, 1991: 5616 / 1.95583, 1990: 5616 / 1.95583, 1989: 4752 / 1.95583, 1988: 4752 / 1.95583, 1987: 4536 / 1.95583, 1986: 4536 / 1.95583, 1985: 4212 / 1.95583, 1984: 4212 / 1.95583, 1983: 4212 / 1.95583, 1982: 4212 / 1.95583, 1981: 4212 / 1.95583, 1980: 3690 / 1.95583, 1979: 3690 / 1.95583, 1978: 3329 / 1.95583, 1977: 3029 / 1.95583, 1976: 3029 / 1.95583, 1975: 3029 / 1.95583, } # Loop through years to get the policy parameters for i in years: policy_params, policy_functions = set_up_policy_environment(i) params = policy_params["eink_st_abzuege"] if i < 2002: params["grundfreibetrag"] = round(grundfreibetrag[i]) if i >= 2002: params["grundfreibetrag"] = policy_params["eink_st"][ "eink_st_tarif"]["thresholds"][1] eink_ab_df[i] = params.values() eink_ab_df.index = params.keys() deduction_df = eink_ab_df.transpose() # Adjust dictionary entries into columns for kinderfreibetrag deduction_df = pd.concat( [ deduction_df.drop(["kinderfreibetrag", "datum"], axis=1), deduction_df["kinderfreibetrag"].apply(pd.Series), ], axis=1, ) deduction_df = deduction_df.drop(["behinderten_pauschbetrag", 0], axis=1) return deduction_df