def decile_chart(baseline: Microsimulation, reformed: Microsimulation) -> dict:
    """Chart of average net effect of a reform by income decile.

    :param baseline: Baseline microsimulation.
    :type baseline: Microsimulation
    :param reformed: Reform microsimulation.
    :type reformed: Microsimulation
    :return: Decile chart as a JSON representation of a Plotly chart.
    :rtype: dict
    """
    income = baseline.calc("household_net_income", map_to="person")
    equiv_income = baseline.calc("equiv_household_net_income", map_to="person")
    gain = reformed.calc("household_net_income", map_to="person") - income
    changes = (gain.groupby(equiv_income.decile_rank()).sum() /
               income.groupby(equiv_income.decile_rank()).sum())
    df = pd.DataFrame({"Decile": changes.index, "Change": changes.values})
    fig = (px.bar(df, x="Decile", y="Change").update_layout(
        title="Change to net income by decile",
        xaxis_title="Equivalised disposable income decile",
        yaxis_title="Percentage change",
        yaxis_tickformat="%",
        showlegend=False,
        xaxis_tickvals=list(range(1, 11)),
    ).update_traces(marker_color=charts.BLUE))
    charts.add_zero_line(fig)
    return charts.formatted_fig_json(fig)
Пример #2
0
def migrate_to_universal_credit(dataset: Dataset = FRS,
                                year: int = 2022) -> Dict[str, ArrayLike]:
    """Converts legacy benefit claimants to Universal Credit claimants by switching
    reported amounts.

    Args:
        dataset (type, optional): The dataset to use. Defaults to FRS.
        year (int, optional): The year to use. Defaults to 2022.

    Returns:
        Dict[str, ArrayLike]: Variables with replaced values.
    """
    from openfisca_uk import Microsimulation

    frs = Microsimulation(
        dataset=dataset,
        year=year,
    )

    changes = {
        f"universal_credit_reported/{year}":
        frs.calc("universal_credit_reported", period=year).values
    }

    for benefit in LEGACY_BENEFITS:
        reported_amount = frs.calc(benefit + "_reported", period=year).values
        changes[f"{benefit}_reported/{year}"] = reported_amount * 0
        changes[f"universal_credit_reported/{year}"] += reported_amount

    return changes
def spending(baseline: Microsimulation, reformed: Microsimulation) -> float:
    """Budgetary impact of a reform (difference in net income).

    :param baseline: Baseline microsimulation.
    :type baseline: Microsimulation
    :param reformed: Reform microsimulation.
    :type reformed: Microsimulation
    :return: Reform net income minus baseline net income.
    :rtype: float
    """
    return (reformed.calc("net_income").sum() -
            baseline.calc("net_income").sum())
def intra_decile_graph_data(baseline: Microsimulation,
                            reformed: Microsimulation) -> pd.DataFrame:
    """Data for the distribution of net income changes by decile and overall.

    :param baseline: Baseline simulation.
    :type baseline: Microsimulation
    :param reformed: Reform simulation.
    :type reformed: Microsimulation
    :return: DataFrame with share of each decile experiencing each outcome.
    :rtype: pd.DataFrame
    """
    l = []
    income = baseline.calc("equiv_household_net_income", map_to="person")
    decile = income.decile_rank()
    baseline_hh_net_income = baseline.calc("household_net_income",
                                           map_to="person")
    reformed_hh_net_income = reformed.calc("household_net_income",
                                           map_to="person")
    gain = reformed_hh_net_income - baseline_hh_net_income
    rel_gain = (gain / baseline_hh_net_income).dropna()
    bands = (None, 0.05, 1e-3, -1e-3, -0.05, None)
    for upper, lower, name in zip(bands[:-1], bands[1:], NAMES):
        fractions = []
        for j in range(1, 11):
            subset = rel_gain[decile == j]
            if lower is not None:
                subset = subset[rel_gain > lower]
            if upper is not None:
                subset = subset[rel_gain <= upper]
            fractions += [subset.count() / rel_gain[decile == j].count()]
        tmp = pd.DataFrame({
            "fraction": fractions,
            "decile": list(map(str, range(1, 11))),
            "outcome": name,
        })
        l.append(tmp)
        subset = rel_gain
        if lower is not None:
            subset = subset[rel_gain > lower]
        if upper is not None:
            subset = subset[rel_gain <= upper]
        all_row = pd.DataFrame({
            "fraction": [subset.count() / rel_gain.count()],
            "decile": "All",
            "outcome": name,
        })
        l.append(all_row)
    return pd.concat(l).reset_index()
Пример #5
0
def test_speed():
    start_time = time()
    from openfisca_uk import Microsimulation

    import_time = time()
    sim = Microsimulation()
    init_time = time()
    sim.calc("household_net_income")
    calc_time = time()
    output = dict(
        import_model=import_time - start_time,
        init_model=init_time - start_time,
        run_time=calc_time - start_time,
        total_time=calc_time - start_time,
    )
    for key in output:
        output[key] = f"{round(output[key], 2)}s"
    print(yaml.dump(output))
Пример #6
0
def main(args):
    with open("docs/summary/summary.yaml", "r") as f:
        previous_results = yaml.safe_load(f)

    sim = Microsimulation(dataset=EnhancedFRS, year=2022)
    year = 2022

    results = {
        "Poverty rate (BHC)":
        percentage(
            sim.calc("in_poverty_bhc", map_to="person", period=year).mean()),
        "Poverty rate (AHC)":
        percentage(
            sim.calc("in_poverty_ahc", map_to="person", period=year).mean()),
        "Income Tax revenue":
        gbp(sim.calc("income_tax", period=year).sum()),
        "National Insurance (employee-side) revenue":
        gbp(sim.calc("national_insurance", period=year).sum()),
        "Total income":
        gbp(sim.calc("total_income", period=year).sum()),
        "Benefit expenditure":
        gbp(sim.calc("benefits", period=year).sum()),
    }

    for key, value in results.items():
        previous_value = previous_results.get(key, "")
        if previous_value != value:
            print(f"{key}: {previous_value} -> {value}")
        else:
            print(f"{key}: {value}")

    if args.save:
        with open("docs/summary/summary.yaml", "w") as f:
            yaml.safe_dump(results, f)
def get_household_mtrs(
    reform: ReformType,
    variable: str,
    period: int = None,
    baseline: Microsimulation = None,
    **kwargs: dict,
) -> pd.Series:
    """Calculates household MTRs with respect to a given variable.

    Args:
        reform (ReformType): The reform to apply to the simulation.
        variable (str): The variable to increase.
        period (int): The period (year) to calculate the MTRs for.
        kwargs (dict): Additional arguments to pass to the simulation.

    Returns:
        pd.Series: The household MTRs.
    """
    baseline = baseline or Microsimulation(reform, **kwargs)
    baseline_var = baseline.calc(variable, period)
    bonus = baseline.calc("is_adult", period) * 1  # Increase only adult values
    reformed = Microsimulation(reform, **kwargs)
    reformed.set_input(variable, period, baseline_var + bonus)

    household_bonus = reformed.calc(
        variable, map_to="household", period=period) - baseline.calc(
            variable, map_to="household", period=period)
    household_net_change = reformed.calc(
        "household_net_income", period=period) - baseline.calc(
            "household_net_income", period=period)
    mtr = (household_bonus - household_net_change) / household_bonus
    mtr = mtr.replace([np.inf, -np.inf], np.nan).fillna(0).clip(0, 1)
    return mtr
Пример #8
0
def headline_metrics(baseline: Microsimulation,
                     reformed: Microsimulation) -> dict:
    """Compute headline society-wide metrics.

    :param baseline: Baseline simulation.
    :type baseline: Microsimulation
    :param reformed: Reform simulation.
    :type reformed: Microsimulation
    :return: Dictionary with net_cost, poverty_change, winner_share,
        loser_share, and gini_change.
    :rtype: dict
    """
    new_income = reformed.calc("equiv_household_net_income", map_to="person")
    old_income = baseline.calc("equiv_household_net_income", map_to="person")
    gain = new_income - old_income
    net_cost = (reformed.calc("net_income").sum() -
                baseline.calc("net_income").sum())
    poverty_change = pct_change(
        baseline.calc("in_poverty_bhc", map_to="person").mean(),
        reformed.calc("in_poverty_bhc", map_to="person").mean(),
    )
    winner_share = (gain > 1).mean()
    loser_share = (gain < -1).mean()
    gini_change = pct_change(old_income.gini(), new_income.gini())
    return dict(
        net_cost=gbp(net_cost),
        net_cost_numeric=(net_cost),
        poverty_change=float(poverty_change),
        winner_share=float(winner_share),
        loser_share=float(loser_share),
        gini_change=float(gini_change),
    )
Пример #9
0
def get_data():
    """Generate key datasets for UBI reforms.

    Returns:
        DataFrame: Baseline DataFrame with core variables.
        DataFrame: UBI tax reform DataFrame with core variables.
        float: Yearly revenue raised by the UBI tax reform.
    """
    baseline = Microsimulation()
    baseline_df = baseline.df(
        [var for var in BASELINE_COLS if var != "is_disabled_for_ubi"],
        map_to="household")
    reform_no_ubi = ubi_reform(0, 0, 0, 0, pd.Series([0] * 12, index=REGIONS))
    reform_no_ubi_sim = Microsimulation(reform_no_ubi)
    reform_base_df = reform_no_ubi_sim.df(BASELINE_COLS, map_to="household")
    budget = (baseline.calc("net_income", map_to="household").sum() -
              reform_no_ubi_sim.calc("net_income", map_to="household").sum())
    baseline_df_pd = pd.DataFrame(baseline_df)
    baseline_df_pd["household_weight"] = baseline_df.weights
    reform_base_df_pd = pd.DataFrame(reform_base_df)
    reform_base_df_pd["household_weight"] = reform_base_df.weights
    return baseline_df_pd, reform_base_df_pd, budget
Пример #10
0
def generate_baseline_variables(dataset: Dataset, year: int):
    """
    Save baseline values of variables to a H5 dataset.

    Args:
        year (int): The year of the EnhancedFRS to input the results in.
    """

    from openfisca_uk import Microsimulation

    YEARS = list(range(year, 2026))
    baseline = Microsimulation(dataset=dataset)

    variable_metadata = baseline.simulation.tax_benefit_system.variables

    variables = []

    for variable in variable_metadata.keys():
        if variable[:9] == "baseline_":
            variables += [variable_metadata[variable[9:]]]

    print(f"Found {len(variables)} variables to store baseline values for:")
    print("\n* " + "\n* ".join([variable.label for variable in variables]))

    existing_dataset = {}
    with dataset.load(year) as data:
        for variable in data.keys():
            existing_dataset[variable] = {}
            for time_period in data[variable].keys():
                existing_dataset[variable][time_period] = data[variable][
                    time_period][...]

        for variable in variables:
            existing_dataset[f"baseline_{variable.name}"] = {}
            for subyear in YEARS:
                existing_dataset[f"baseline_{variable.name}"][
                    subyear] = baseline.calc(variable.name,
                                             period=subyear).values

    with h5py.File(dataset.file(year), "w") as f:
        for variable in existing_dataset.keys():
            for time_period in existing_dataset[variable].keys():
                f[f"{variable}/{time_period}"] = existing_dataset[variable][
                    time_period]
Пример #11
0
def ubi():
    start_time = time()
    app.logger.info("UBI size request received")
    params = {**request.args, **(request.json or {})}
    request_id = "ubi-" + dict_to_string(params) + "-" + VERSION
    blob = bucket.blob(request_id + ".json")
    if blob.exists() and USE_CACHE:
        app.logger.info("Returning cached response")
        result = json.loads(blob.download_as_string())
        return result
    reform, _ = create_reform(params, return_names=True)
    reformed = Microsimulation(reform)
    revenue = (baseline.calc("net_income").sum() -
               reformed.calc("net_income").sum())
    UBI_amount = max(0, revenue / baseline.calc("people").sum())
    result = {"UBI": float(UBI_amount)}
    if USE_CACHE:
        blob.upload_from_string(json.dumps(result))
    gc.collect()
    duration = time() - start_time
    app.logger.info(f"UBI size calculation completed ({round(duration, 2)}s)")
    return result
Пример #12
0
def impute_incomes(dataset: Dataset = FRS, year: int = 2022) -> MicroDataFrame:
    """Imputation of high incomes from the SPI.

    Args:
        dataset (type): The dataset to clone.
        year (int): The year to clone.

    Returns:
        Dict[str, ArrayLike]: The mapping from the original dataset to the cloned dataset.
    """
    from openfisca_uk import Microsimulation

    # Most recent SPI used - if it's before the FRS year then data will be uprated
    # automatically by OpenFisca-UK
    spi = Microsimulation(dataset=SPI)
    frs = Microsimulation(
        dataset=dataset,
        year=year,
    )

    regions = spi.calc("region").unique()

    spi_df = spi.df(PREDICTORS + IMPUTATIONS)
    frs_df = frs.df(PREDICTORS)

    frs_df.region = frs_df.region.map(
        {name: float(i)
         for i, name in enumerate(regions)})
    spi_df.region = spi_df.region.map(
        {name: float(i)
         for i, name in enumerate(regions)})

    return si.rf_impute(
        x_train=spi_df.drop(IMPUTATIONS, axis=1),
        y_train=spi_df[IMPUTATIONS],
        x_new=frs_df,
        verbose=True,
    )
Пример #13
0
def remove_zero_weight_households(dataset: Dataset, year: int):
    """Removes zero-weight households (and benefit units and people) from a year of the given dataset.

    Args:
        dataset (Dataset): The dataset to edit.
        year (int): The year of the dataset to edit.
    """

    from openfisca_uk import Microsimulation

    sim = Microsimulation(dataset=dataset, year=year)

    # To be removed, households must have zero weight in all of these years
    YEARS = list(range(year, 2027))

    variables = dataset.keys(year)

    for variable in variables:
        if variable not in sim.simulation.tax_benefit_system.variables:
            continue
        entity = sim.simulation.tax_benefit_system.variables[
            variable].entity.key
        has_nonzero_weight = (sum([
            sim.calc(f"{entity}_weight", period=year).values for year in YEARS
        ]) > 0)
        if dataset.data_format == Dataset.ARRAYS:
            dataset.save(
                year,
                variable,
                dataset.load(year, variable)[has_nonzero_weight],
            )
        elif dataset.data_format == Dataset.TIME_PERIOD_ARRAYS:
            for period in dataset.load(year, variable):
                key = f"{variable}/{period}"
                dataset.save(year, key,
                             dataset.load(year, key)[has_nonzero_weight])
Пример #14
0
from openfisca_uk import Microsimulation
import numpy as np
from tqdm import tqdm

sim = Microsimulation()

text = "# OpenFisca-UK Variable Statistics\n\nAll statistics generated from the uprated (to 2020) 2018-19 Family Resources Survey, with simulation turned on.\n\n"

for name, var in tqdm(
    sim.simulation.tax_benefit_system.variables.items(),
    desc="Generating descriptions",
):
    values = sim.calc(name, 2020)
    if var.value_type in (float, bool, int):
        text += f"\n- {name}:\n  - Type: {var.value_type.__name__}\n  - Entity: {var.entity.key}\n  - Description: {var.label}\n  - Mean: {values.mean()}\n  - Median: {values.median()}\n  - Stddev: {values.std()}\n  - Non-zero count: {(values > 0).sum()}\n\n"
    else:
        text += f"\n- {name}:\n  - Type: Categorical\n  - Entity: {var.entity.key}\n  - Description: {var.label}\n\n"


with open("variable_stats.md", "w+") as f:
    f.write(text)
Пример #15
0
    def generate(self, year: int):
        if year in self.years:
            self.remove(year)
        # Load raw FRS tables
        year = int(year)

        if len(RawLCFS.years) == 0:
            raise FileNotFoundError(
                "Raw LCFS not found. Please run `openfisca-uk data lcfs generate [year]` first."
            )

        if year > max(RawLCFS.years):
            logging.warning("Uprating a previous version of the LCFS.")
            if len(self.years) == 0:
                self.generate(max(RawLCFS.years))
            if len(self.years) > 0:
                lcfs_year = max(self.years)
                from openfisca_uk import Microsimulation

                sim = Microsimulation(dataset=self, year=max(self.years))
                lcfs = h5py.File(self.file(year), mode="w")
                for variable in self.keys(lcfs_year):
                    lcfs[variable] = sim.calc(variable).values
                lcfs.close()
                return
        households = RawLCFS.load(2019, "lcfs_2019_dvhh_ukanon")
        people = RawLCFS.load(2019, "lcfs_2019_dvper_ukanon201920")
        spending = (households[list(
            CATEGORY_NAMES.keys())].unstack().reset_index())
        spending.columns = "category", "household", "spending"
        spending["household"] = households.case[spending.household].values
        households = households.set_index("case")
        spending.category = spending.category.map(CATEGORY_NAMES).map(
            name_to_variable_name)
        spending.spending *= 52
        spending["weight"] = (households.weighta[spending.household].values *
                              100)
        spending = pd.DataFrame(spending)

        for category in spending.category.unique():
            spending[category] = (spending.category
                                  == category) * spending.spending

        lcf_df = (pd.DataFrame(
            spending[["household", "weight"] +
                     CATEGORY_VARIABLES]).groupby("household").sum())

        # Add in LCFS variables that also appear in the FRS-based microsimulation model

        lcf_household_vars = households[list(
            HOUSEHOLD_LCF_RENAMES.keys())].rename(
                columns=HOUSEHOLD_LCF_RENAMES)
        lcf_person_vars = (people[list(PERSON_LCF_RENAMES) + ["case"]].rename(
            columns=PERSON_LCF_RENAMES).groupby("case").sum())

        lcf_with_demographics = pd.concat(
            [
                lcf_df,
                lcf_household_vars,
                lcf_person_vars,
            ],
            axis=1,
        )

        # LCFS incomes are weekly - convert to annual
        for variable in PERSON_LCF_RENAMES.values():
            lcf_with_demographics[variable] *= 52

        lcf_with_demographics.region = lcf_with_demographics.region.map(
            REGIONS)
        lcfs = lcf_with_demographics.sort_index()

        lcfs = lcfs.rename(columns=dict(weight="household_weight"))

        entity_index = (lcfs.index.values
                        )  # One-person households for simplicity for now

        with h5py.File(self.file(year), mode="w") as f:
            for entity_id_var in [
                    "person_id",
                    "benunit_id",
                    "household_id",
                    "person_benunit_id",
                    "person_household_id",
            ]:
                f[entity_id_var] = entity_index

            f["person_benunit_role"] = ["adult"] * len(entity_index)
            f["person_household_role"] = ["adult"] * len(entity_index)
            f["person_state_id"] = [1] * len(entity_index)
            f["state_id"] = [1]

            for variable in lcfs.columns:
                f[variable] = lcfs[variable].values
def get_calculator_output(baseline, year, reform=None, data=None):
    """
    This function creates an OpenFisca Microsimulation object with the
    policy specified in reform and the data specified with the data
    kwarg.

    Args:
        baseline (boolean): True if baseline tax policy
        year (int): year of data to simulate
        reform (OpenFisca Reform object): IIT policy reform parameters,
            None if baseline
        data (DataFrame or str): DataFrame or path to datafile for
            the PopulationSim object

    Returns:
        tax_dict (dict): a dictionary of microdata with marginal tax
            rates and other information computed from OpenFisca-UK

    """
    # create a simulation
    sim_kwargs = dict(dataset=dataset, year=2019)
    if reform is None:
        sim = Microsimulation(**sim_kwargs)
        reform = ()
    else:
        sim = Microsimulation(reform, **sim_kwargs)
    if baseline:
        print("Running current law policy baseline")
    else:
        print("Baseline policy is: ", reform)

    # Check that start_year is appropriate
    if year > DATA_LAST_YEAR:
        raise RuntimeError("Start year is beyond data extrapolation.")

    # define market income - taking expanded_income and excluding gov't
    # transfer benefits
    market_income = np.maximum(
        sim.calc("gross_income", map_to="household", period=year).values -
        sim.calc("benefits", map_to="household", period=year).values,
        1,
    )

    # Compute marginal tax rates (can only do on earned income now)

    # Put MTRs, income, tax liability, and other variables in dict
    length = sim.calc("household_weight").size
    tax_dict = {
        "mtr_labinc":
        get_household_mtrs(
            reform,
            "employment_income",
            period=year,
            baseline=sim,
            **sim_kwargs,
        ),
        "mtr_capinc":
        get_household_mtrs(
            reform,
            "savings_interest_income",
            period=year,
            baseline=sim,
            **sim_kwargs,
        ),
        "age":
        sim.calc("age", map_to="household", how="max", period=year),
        "total_labinc":
        sim.calc("earned_income", map_to="household", period=year),
        "total_capinc":
        market_income -
        sim.calc("earned_income", map_to="household", period=year),
        "market_income":
        market_income,
        "total_tax_liab":
        sim.calc("income_tax", map_to="household", period=year),
        "payroll_tax_liab":
        sim.calc("national_insurance", map_to="household", period=year),
        "etr": (1 - (sim.calc("net_income", map_to="household", period=year)) /
                market_income).clip(-10, 1.5),
        "year":
        year * np.ones(length),
        "weight":
        sim.calc("household_weight", period=year),
    }

    return tax_dict
Пример #17
0
    def generate(self, year: int):
        if year in self.years:
            self.remove(year)
        # Load raw FRS tables
        year = int(year)

        if len(RawFRS.years) == 0:
            raise FileNotFoundError(
                "Raw FRS not found. Please run `openfisca-uk data raw_frs generate [year]` first."
            )

        if year > max(RawFRS.years):
            logging.warning("Uprating a previous version of the FRS.")
            if len(self.years) == 0:
                self.generate(max(RawFRS.years))
            if len(FRS.years) > 0:
                frs_year = max(self.years)
                from openfisca_uk import Microsimulation

                sim = Microsimulation(dataset=self, year=max(self.years))
                frs = h5py.File(self.file(year), mode="w")
                for variable in self.keys(frs_year):
                    frs[variable] = sim.calc(variable).values
                frs.close()
                return

        raw_frs_files = RawFRS.load(year)
        frs = h5py.File(self.file(year), mode="w")
        logging.info("Generating FRS dataset for year {}".format(year))
        logging.info("Loading FRS tables")
        TABLES = (
            "adult",
            "child",
            "accounts",
            "benefits",
            "job",
            "oddjob",
            "benunit",
            "househol",
            "chldcare",
            "pension",
            "maint",
            "mortgage",
            "penprov",
        )
        (
            adult,
            child,
            accounts,
            benefits,
            job,
            oddjob,
            benunit,
            household,
            childcare,
            pension,
            maintenance,
            mortgage,
            pen_prov,
        ) = [raw_frs_files[table] for table in TABLES]
        raw_frs_files.close()

        logging.info("Joining adult and child tables")

        person = pd.concat([adult, child]).sort_index().fillna(0)

        # Generate OpenFisca-UK variables and save
        logging.info("Generating OpenFisca-UK variables")
        add_id_variables(frs, person, benunit, household)
        add_personal_variables(frs, person)
        add_benunit_variables(frs, benunit)
        add_household_variables(frs, household)
        add_market_income(frs, person, pension, job, accounts, household,
                          oddjob, year)
        add_benefit_income(frs, person, benefits, household)
        add_expenses(
            frs,
            person,
            job,
            household,
            maintenance,
            mortgage,
            childcare,
            pen_prov,
        )
        frs.close()
        logging.info("Completed FRS generation")