def decile_chart(baseline: Microsimulation, reformed: Microsimulation) -> dict: """Chart of average net effect of a reform by income decile. :param baseline: Baseline microsimulation. :type baseline: Microsimulation :param reformed: Reform microsimulation. :type reformed: Microsimulation :return: Decile chart as a JSON representation of a Plotly chart. :rtype: dict """ income = baseline.calc("household_net_income", map_to="person") equiv_income = baseline.calc("equiv_household_net_income", map_to="person") gain = reformed.calc("household_net_income", map_to="person") - income changes = (gain.groupby(equiv_income.decile_rank()).sum() / income.groupby(equiv_income.decile_rank()).sum()) df = pd.DataFrame({"Decile": changes.index, "Change": changes.values}) fig = (px.bar(df, x="Decile", y="Change").update_layout( title="Change to net income by decile", xaxis_title="Equivalised disposable income decile", yaxis_title="Percentage change", yaxis_tickformat="%", showlegend=False, xaxis_tickvals=list(range(1, 11)), ).update_traces(marker_color=charts.BLUE)) charts.add_zero_line(fig) return charts.formatted_fig_json(fig)
def migrate_to_universal_credit(dataset: Dataset = FRS, year: int = 2022) -> Dict[str, ArrayLike]: """Converts legacy benefit claimants to Universal Credit claimants by switching reported amounts. Args: dataset (type, optional): The dataset to use. Defaults to FRS. year (int, optional): The year to use. Defaults to 2022. Returns: Dict[str, ArrayLike]: Variables with replaced values. """ from openfisca_uk import Microsimulation frs = Microsimulation( dataset=dataset, year=year, ) changes = { f"universal_credit_reported/{year}": frs.calc("universal_credit_reported", period=year).values } for benefit in LEGACY_BENEFITS: reported_amount = frs.calc(benefit + "_reported", period=year).values changes[f"{benefit}_reported/{year}"] = reported_amount * 0 changes[f"universal_credit_reported/{year}"] += reported_amount return changes
def spending(baseline: Microsimulation, reformed: Microsimulation) -> float: """Budgetary impact of a reform (difference in net income). :param baseline: Baseline microsimulation. :type baseline: Microsimulation :param reformed: Reform microsimulation. :type reformed: Microsimulation :return: Reform net income minus baseline net income. :rtype: float """ return (reformed.calc("net_income").sum() - baseline.calc("net_income").sum())
def intra_decile_graph_data(baseline: Microsimulation, reformed: Microsimulation) -> pd.DataFrame: """Data for the distribution of net income changes by decile and overall. :param baseline: Baseline simulation. :type baseline: Microsimulation :param reformed: Reform simulation. :type reformed: Microsimulation :return: DataFrame with share of each decile experiencing each outcome. :rtype: pd.DataFrame """ l = [] income = baseline.calc("equiv_household_net_income", map_to="person") decile = income.decile_rank() baseline_hh_net_income = baseline.calc("household_net_income", map_to="person") reformed_hh_net_income = reformed.calc("household_net_income", map_to="person") gain = reformed_hh_net_income - baseline_hh_net_income rel_gain = (gain / baseline_hh_net_income).dropna() bands = (None, 0.05, 1e-3, -1e-3, -0.05, None) for upper, lower, name in zip(bands[:-1], bands[1:], NAMES): fractions = [] for j in range(1, 11): subset = rel_gain[decile == j] if lower is not None: subset = subset[rel_gain > lower] if upper is not None: subset = subset[rel_gain <= upper] fractions += [subset.count() / rel_gain[decile == j].count()] tmp = pd.DataFrame({ "fraction": fractions, "decile": list(map(str, range(1, 11))), "outcome": name, }) l.append(tmp) subset = rel_gain if lower is not None: subset = subset[rel_gain > lower] if upper is not None: subset = subset[rel_gain <= upper] all_row = pd.DataFrame({ "fraction": [subset.count() / rel_gain.count()], "decile": "All", "outcome": name, }) l.append(all_row) return pd.concat(l).reset_index()
def test_speed(): start_time = time() from openfisca_uk import Microsimulation import_time = time() sim = Microsimulation() init_time = time() sim.calc("household_net_income") calc_time = time() output = dict( import_model=import_time - start_time, init_model=init_time - start_time, run_time=calc_time - start_time, total_time=calc_time - start_time, ) for key in output: output[key] = f"{round(output[key], 2)}s" print(yaml.dump(output))
def main(args): with open("docs/summary/summary.yaml", "r") as f: previous_results = yaml.safe_load(f) sim = Microsimulation(dataset=EnhancedFRS, year=2022) year = 2022 results = { "Poverty rate (BHC)": percentage( sim.calc("in_poverty_bhc", map_to="person", period=year).mean()), "Poverty rate (AHC)": percentage( sim.calc("in_poverty_ahc", map_to="person", period=year).mean()), "Income Tax revenue": gbp(sim.calc("income_tax", period=year).sum()), "National Insurance (employee-side) revenue": gbp(sim.calc("national_insurance", period=year).sum()), "Total income": gbp(sim.calc("total_income", period=year).sum()), "Benefit expenditure": gbp(sim.calc("benefits", period=year).sum()), } for key, value in results.items(): previous_value = previous_results.get(key, "") if previous_value != value: print(f"{key}: {previous_value} -> {value}") else: print(f"{key}: {value}") if args.save: with open("docs/summary/summary.yaml", "w") as f: yaml.safe_dump(results, f)
def get_household_mtrs( reform: ReformType, variable: str, period: int = None, baseline: Microsimulation = None, **kwargs: dict, ) -> pd.Series: """Calculates household MTRs with respect to a given variable. Args: reform (ReformType): The reform to apply to the simulation. variable (str): The variable to increase. period (int): The period (year) to calculate the MTRs for. kwargs (dict): Additional arguments to pass to the simulation. Returns: pd.Series: The household MTRs. """ baseline = baseline or Microsimulation(reform, **kwargs) baseline_var = baseline.calc(variable, period) bonus = baseline.calc("is_adult", period) * 1 # Increase only adult values reformed = Microsimulation(reform, **kwargs) reformed.set_input(variable, period, baseline_var + bonus) household_bonus = reformed.calc( variable, map_to="household", period=period) - baseline.calc( variable, map_to="household", period=period) household_net_change = reformed.calc( "household_net_income", period=period) - baseline.calc( "household_net_income", period=period) mtr = (household_bonus - household_net_change) / household_bonus mtr = mtr.replace([np.inf, -np.inf], np.nan).fillna(0).clip(0, 1) return mtr
def headline_metrics(baseline: Microsimulation, reformed: Microsimulation) -> dict: """Compute headline society-wide metrics. :param baseline: Baseline simulation. :type baseline: Microsimulation :param reformed: Reform simulation. :type reformed: Microsimulation :return: Dictionary with net_cost, poverty_change, winner_share, loser_share, and gini_change. :rtype: dict """ new_income = reformed.calc("equiv_household_net_income", map_to="person") old_income = baseline.calc("equiv_household_net_income", map_to="person") gain = new_income - old_income net_cost = (reformed.calc("net_income").sum() - baseline.calc("net_income").sum()) poverty_change = pct_change( baseline.calc("in_poverty_bhc", map_to="person").mean(), reformed.calc("in_poverty_bhc", map_to="person").mean(), ) winner_share = (gain > 1).mean() loser_share = (gain < -1).mean() gini_change = pct_change(old_income.gini(), new_income.gini()) return dict( net_cost=gbp(net_cost), net_cost_numeric=(net_cost), poverty_change=float(poverty_change), winner_share=float(winner_share), loser_share=float(loser_share), gini_change=float(gini_change), )
def get_data(): """Generate key datasets for UBI reforms. Returns: DataFrame: Baseline DataFrame with core variables. DataFrame: UBI tax reform DataFrame with core variables. float: Yearly revenue raised by the UBI tax reform. """ baseline = Microsimulation() baseline_df = baseline.df( [var for var in BASELINE_COLS if var != "is_disabled_for_ubi"], map_to="household") reform_no_ubi = ubi_reform(0, 0, 0, 0, pd.Series([0] * 12, index=REGIONS)) reform_no_ubi_sim = Microsimulation(reform_no_ubi) reform_base_df = reform_no_ubi_sim.df(BASELINE_COLS, map_to="household") budget = (baseline.calc("net_income", map_to="household").sum() - reform_no_ubi_sim.calc("net_income", map_to="household").sum()) baseline_df_pd = pd.DataFrame(baseline_df) baseline_df_pd["household_weight"] = baseline_df.weights reform_base_df_pd = pd.DataFrame(reform_base_df) reform_base_df_pd["household_weight"] = reform_base_df.weights return baseline_df_pd, reform_base_df_pd, budget
def generate_baseline_variables(dataset: Dataset, year: int): """ Save baseline values of variables to a H5 dataset. Args: year (int): The year of the EnhancedFRS to input the results in. """ from openfisca_uk import Microsimulation YEARS = list(range(year, 2026)) baseline = Microsimulation(dataset=dataset) variable_metadata = baseline.simulation.tax_benefit_system.variables variables = [] for variable in variable_metadata.keys(): if variable[:9] == "baseline_": variables += [variable_metadata[variable[9:]]] print(f"Found {len(variables)} variables to store baseline values for:") print("\n* " + "\n* ".join([variable.label for variable in variables])) existing_dataset = {} with dataset.load(year) as data: for variable in data.keys(): existing_dataset[variable] = {} for time_period in data[variable].keys(): existing_dataset[variable][time_period] = data[variable][ time_period][...] for variable in variables: existing_dataset[f"baseline_{variable.name}"] = {} for subyear in YEARS: existing_dataset[f"baseline_{variable.name}"][ subyear] = baseline.calc(variable.name, period=subyear).values with h5py.File(dataset.file(year), "w") as f: for variable in existing_dataset.keys(): for time_period in existing_dataset[variable].keys(): f[f"{variable}/{time_period}"] = existing_dataset[variable][ time_period]
def ubi(): start_time = time() app.logger.info("UBI size request received") params = {**request.args, **(request.json or {})} request_id = "ubi-" + dict_to_string(params) + "-" + VERSION blob = bucket.blob(request_id + ".json") if blob.exists() and USE_CACHE: app.logger.info("Returning cached response") result = json.loads(blob.download_as_string()) return result reform, _ = create_reform(params, return_names=True) reformed = Microsimulation(reform) revenue = (baseline.calc("net_income").sum() - reformed.calc("net_income").sum()) UBI_amount = max(0, revenue / baseline.calc("people").sum()) result = {"UBI": float(UBI_amount)} if USE_CACHE: blob.upload_from_string(json.dumps(result)) gc.collect() duration = time() - start_time app.logger.info(f"UBI size calculation completed ({round(duration, 2)}s)") return result
def impute_incomes(dataset: Dataset = FRS, year: int = 2022) -> MicroDataFrame: """Imputation of high incomes from the SPI. Args: dataset (type): The dataset to clone. year (int): The year to clone. Returns: Dict[str, ArrayLike]: The mapping from the original dataset to the cloned dataset. """ from openfisca_uk import Microsimulation # Most recent SPI used - if it's before the FRS year then data will be uprated # automatically by OpenFisca-UK spi = Microsimulation(dataset=SPI) frs = Microsimulation( dataset=dataset, year=year, ) regions = spi.calc("region").unique() spi_df = spi.df(PREDICTORS + IMPUTATIONS) frs_df = frs.df(PREDICTORS) frs_df.region = frs_df.region.map( {name: float(i) for i, name in enumerate(regions)}) spi_df.region = spi_df.region.map( {name: float(i) for i, name in enumerate(regions)}) return si.rf_impute( x_train=spi_df.drop(IMPUTATIONS, axis=1), y_train=spi_df[IMPUTATIONS], x_new=frs_df, verbose=True, )
def remove_zero_weight_households(dataset: Dataset, year: int): """Removes zero-weight households (and benefit units and people) from a year of the given dataset. Args: dataset (Dataset): The dataset to edit. year (int): The year of the dataset to edit. """ from openfisca_uk import Microsimulation sim = Microsimulation(dataset=dataset, year=year) # To be removed, households must have zero weight in all of these years YEARS = list(range(year, 2027)) variables = dataset.keys(year) for variable in variables: if variable not in sim.simulation.tax_benefit_system.variables: continue entity = sim.simulation.tax_benefit_system.variables[ variable].entity.key has_nonzero_weight = (sum([ sim.calc(f"{entity}_weight", period=year).values for year in YEARS ]) > 0) if dataset.data_format == Dataset.ARRAYS: dataset.save( year, variable, dataset.load(year, variable)[has_nonzero_weight], ) elif dataset.data_format == Dataset.TIME_PERIOD_ARRAYS: for period in dataset.load(year, variable): key = f"{variable}/{period}" dataset.save(year, key, dataset.load(year, key)[has_nonzero_weight])
from openfisca_uk import Microsimulation import numpy as np from tqdm import tqdm sim = Microsimulation() text = "# OpenFisca-UK Variable Statistics\n\nAll statistics generated from the uprated (to 2020) 2018-19 Family Resources Survey, with simulation turned on.\n\n" for name, var in tqdm( sim.simulation.tax_benefit_system.variables.items(), desc="Generating descriptions", ): values = sim.calc(name, 2020) if var.value_type in (float, bool, int): text += f"\n- {name}:\n - Type: {var.value_type.__name__}\n - Entity: {var.entity.key}\n - Description: {var.label}\n - Mean: {values.mean()}\n - Median: {values.median()}\n - Stddev: {values.std()}\n - Non-zero count: {(values > 0).sum()}\n\n" else: text += f"\n- {name}:\n - Type: Categorical\n - Entity: {var.entity.key}\n - Description: {var.label}\n\n" with open("variable_stats.md", "w+") as f: f.write(text)
def generate(self, year: int): if year in self.years: self.remove(year) # Load raw FRS tables year = int(year) if len(RawLCFS.years) == 0: raise FileNotFoundError( "Raw LCFS not found. Please run `openfisca-uk data lcfs generate [year]` first." ) if year > max(RawLCFS.years): logging.warning("Uprating a previous version of the LCFS.") if len(self.years) == 0: self.generate(max(RawLCFS.years)) if len(self.years) > 0: lcfs_year = max(self.years) from openfisca_uk import Microsimulation sim = Microsimulation(dataset=self, year=max(self.years)) lcfs = h5py.File(self.file(year), mode="w") for variable in self.keys(lcfs_year): lcfs[variable] = sim.calc(variable).values lcfs.close() return households = RawLCFS.load(2019, "lcfs_2019_dvhh_ukanon") people = RawLCFS.load(2019, "lcfs_2019_dvper_ukanon201920") spending = (households[list( CATEGORY_NAMES.keys())].unstack().reset_index()) spending.columns = "category", "household", "spending" spending["household"] = households.case[spending.household].values households = households.set_index("case") spending.category = spending.category.map(CATEGORY_NAMES).map( name_to_variable_name) spending.spending *= 52 spending["weight"] = (households.weighta[spending.household].values * 100) spending = pd.DataFrame(spending) for category in spending.category.unique(): spending[category] = (spending.category == category) * spending.spending lcf_df = (pd.DataFrame( spending[["household", "weight"] + CATEGORY_VARIABLES]).groupby("household").sum()) # Add in LCFS variables that also appear in the FRS-based microsimulation model lcf_household_vars = households[list( HOUSEHOLD_LCF_RENAMES.keys())].rename( columns=HOUSEHOLD_LCF_RENAMES) lcf_person_vars = (people[list(PERSON_LCF_RENAMES) + ["case"]].rename( columns=PERSON_LCF_RENAMES).groupby("case").sum()) lcf_with_demographics = pd.concat( [ lcf_df, lcf_household_vars, lcf_person_vars, ], axis=1, ) # LCFS incomes are weekly - convert to annual for variable in PERSON_LCF_RENAMES.values(): lcf_with_demographics[variable] *= 52 lcf_with_demographics.region = lcf_with_demographics.region.map( REGIONS) lcfs = lcf_with_demographics.sort_index() lcfs = lcfs.rename(columns=dict(weight="household_weight")) entity_index = (lcfs.index.values ) # One-person households for simplicity for now with h5py.File(self.file(year), mode="w") as f: for entity_id_var in [ "person_id", "benunit_id", "household_id", "person_benunit_id", "person_household_id", ]: f[entity_id_var] = entity_index f["person_benunit_role"] = ["adult"] * len(entity_index) f["person_household_role"] = ["adult"] * len(entity_index) f["person_state_id"] = [1] * len(entity_index) f["state_id"] = [1] for variable in lcfs.columns: f[variable] = lcfs[variable].values
def get_calculator_output(baseline, year, reform=None, data=None): """ This function creates an OpenFisca Microsimulation object with the policy specified in reform and the data specified with the data kwarg. Args: baseline (boolean): True if baseline tax policy year (int): year of data to simulate reform (OpenFisca Reform object): IIT policy reform parameters, None if baseline data (DataFrame or str): DataFrame or path to datafile for the PopulationSim object Returns: tax_dict (dict): a dictionary of microdata with marginal tax rates and other information computed from OpenFisca-UK """ # create a simulation sim_kwargs = dict(dataset=dataset, year=2019) if reform is None: sim = Microsimulation(**sim_kwargs) reform = () else: sim = Microsimulation(reform, **sim_kwargs) if baseline: print("Running current law policy baseline") else: print("Baseline policy is: ", reform) # Check that start_year is appropriate if year > DATA_LAST_YEAR: raise RuntimeError("Start year is beyond data extrapolation.") # define market income - taking expanded_income and excluding gov't # transfer benefits market_income = np.maximum( sim.calc("gross_income", map_to="household", period=year).values - sim.calc("benefits", map_to="household", period=year).values, 1, ) # Compute marginal tax rates (can only do on earned income now) # Put MTRs, income, tax liability, and other variables in dict length = sim.calc("household_weight").size tax_dict = { "mtr_labinc": get_household_mtrs( reform, "employment_income", period=year, baseline=sim, **sim_kwargs, ), "mtr_capinc": get_household_mtrs( reform, "savings_interest_income", period=year, baseline=sim, **sim_kwargs, ), "age": sim.calc("age", map_to="household", how="max", period=year), "total_labinc": sim.calc("earned_income", map_to="household", period=year), "total_capinc": market_income - sim.calc("earned_income", map_to="household", period=year), "market_income": market_income, "total_tax_liab": sim.calc("income_tax", map_to="household", period=year), "payroll_tax_liab": sim.calc("national_insurance", map_to="household", period=year), "etr": (1 - (sim.calc("net_income", map_to="household", period=year)) / market_income).clip(-10, 1.5), "year": year * np.ones(length), "weight": sim.calc("household_weight", period=year), } return tax_dict
def generate(self, year: int): if year in self.years: self.remove(year) # Load raw FRS tables year = int(year) if len(RawFRS.years) == 0: raise FileNotFoundError( "Raw FRS not found. Please run `openfisca-uk data raw_frs generate [year]` first." ) if year > max(RawFRS.years): logging.warning("Uprating a previous version of the FRS.") if len(self.years) == 0: self.generate(max(RawFRS.years)) if len(FRS.years) > 0: frs_year = max(self.years) from openfisca_uk import Microsimulation sim = Microsimulation(dataset=self, year=max(self.years)) frs = h5py.File(self.file(year), mode="w") for variable in self.keys(frs_year): frs[variable] = sim.calc(variable).values frs.close() return raw_frs_files = RawFRS.load(year) frs = h5py.File(self.file(year), mode="w") logging.info("Generating FRS dataset for year {}".format(year)) logging.info("Loading FRS tables") TABLES = ( "adult", "child", "accounts", "benefits", "job", "oddjob", "benunit", "househol", "chldcare", "pension", "maint", "mortgage", "penprov", ) ( adult, child, accounts, benefits, job, oddjob, benunit, household, childcare, pension, maintenance, mortgage, pen_prov, ) = [raw_frs_files[table] for table in TABLES] raw_frs_files.close() logging.info("Joining adult and child tables") person = pd.concat([adult, child]).sort_index().fillna(0) # Generate OpenFisca-UK variables and save logging.info("Generating OpenFisca-UK variables") add_id_variables(frs, person, benunit, household) add_personal_variables(frs, person) add_benunit_variables(frs, benunit) add_household_variables(frs, household) add_market_income(frs, person, pension, job, accounts, household, oddjob, year) add_benefit_income(frs, person, benefits, household) add_expenses( frs, person, job, household, maintenance, mortgage, childcare, pen_prov, ) frs.close() logging.info("Completed FRS generation")