Exemplo n.º 1
0
def test_rank():
    s = mdf.MicroSeries([1, 2, 3], weights=[4, 5, 6])
    assert np.array_equal(s.rank().values, [4, 9, 15])

    s = mdf.MicroSeries([3, 1, 2], weights=[6, 4, 5])
    assert np.array_equal(s.rank().values, [15, 4, 9])

    s = mdf.MicroSeries([2, 1, 3], weights=[5, 4, 6])
    assert np.array_equal(s.rank().values, [9, 4, 15])
Exemplo n.º 2
0
def test_cumsum():
    s = mdf.MicroSeries([1, 2, 3], weights=[4, 5, 6])
    assert np.array_equal(s.cumsum().values, [4, 14, 32])

    s = mdf.MicroSeries([2, 1, 3], weights=[5, 4, 6])
    assert np.array_equal(s.cumsum().values, [10, 14, 32])

    s = mdf.MicroSeries([3, 1, 2], weights=[6, 4, 5])
    assert np.array_equal(s.cumsum().values, [18, 22, 32])
Exemplo n.º 3
0
def test_top_pct():
    x = list(range(1, 11))  # 1 to 10. Sum = 10 * 11 / 2 = 55.
    df = pd.DataFrame({"x": x})
    ms = mdf.MicroSeries(x)
    RES = 10 / 55
    assert mdf.top_10_pct_share(df, "x") == RES
    assert ms.top_10_pct_share() == RES
    x = list(range(1, 4))
    df = pd.DataFrame({"x": x, "w": x})
    ms = mdf.MicroSeries(x, weights=x)
    # This is equivalent to [1, 2, 2, 3, 3, 3]
    # Sum = 14, top half is 9.
    RES = 9 / 14
    assert mdf.top_50_pct_share(df, "x", "w") == RES
    assert ms.top_50_pct_share() == RES
Exemplo n.º 4
0
def test_series_getitem():
    arr = np.array([0, 1, 1])
    w = np.array([3, 0, 9])
    s = mdf.MicroSeries(arr, weights=w)
    assert s[[1, 2]].sum() == np.sum(arr[[1, 2]] * w[[1, 2]])

    assert s[1:3].sum() == np.sum(arr[1:3] * w[1:3])
Exemplo n.º 5
0
def test_mean():
    arr = np.array([3, 0, 2])
    w = np.array([4, 1, 1])
    series = mdf.MicroSeries(arr, weights=w)
    assert series.mean() == np.average(arr, weights=w)

    arr = np.linspace(-20, 100, 100)
    w = np.linspace(1, 3, 100)
    series = mdf.MicroSeries(arr)
    series.set_weights(w)
    assert series.mean() == np.average(arr, weights=w)

    w = np.linspace(1, 3, 101)
    series = mdf.MicroSeries(arr)
    try:
        series.set_weights(w)
        assert False
    except Exception:
        pass
Exemplo n.º 6
0
def test_sum():
    arr = np.array([0, 1, 1])
    w = np.array([3, 0, 9])
    series = mdf.MicroSeries(arr, weights=w)
    assert series.sum() == (arr * w).sum()

    arr = np.linspace(-20, 100, 100)
    w = np.linspace(1, 3, 100)
    series = mdf.MicroSeries(arr)
    series.set_weights(w)
    assert series.sum() == (arr * w).sum()

    # Verify that an error is thrown when passing weights of different size
    # from the values.
    w = np.linspace(1, 3, 101)
    series = mdf.MicroSeries(arr)
    try:
        series.set_weights(w)
        assert False
    except Exception:
        pass
Exemplo n.º 7
0
 def calc(self, var, map_to=None, period="2020", verbose=False):
     try:
         result = self.model.calculate(var, period)
     except:
         if verbose:
             print(
                 f"Initial period calculation failed for {var}; attempting to gross up periods"
             )
         try:
             result = self.model.calculate_add(var, period)
         except:
             if verbose:
                 print(
                     f"Grossing up period calculation failed for {var}; attempting to divide period"
                 )
             result = self.model.calculate_divide(var, period)
     entity = self.model.tax_benefit_system.variables[var].entity.key
     result = self.map_to(result, entity=entity, target_entity=map_to)
     if not self.use_microdf or self.weight_vars is None:
         return result
     return mdf.MicroSeries(
         result, weights=self.weight_vars[map_to or entity]
     )
Exemplo n.º 8
0
 def calc(
     self,
     var: str,
     period: Union[str, int] = 2020,
     weighted: bool = True,
     map_to: str = None,
     how: str = None,
     dp: int = 2,
 ) -> MicroSeries:
     if period is None:
         period = self.year
     try:
         var_metadata = self.simulation.tax_benefit_system.variables[var]
         arr = self.simulation.calculate(var, period)
     except Exception as e:
         try:
             arr = self.simulation.calculate_add(var, period)
             if var_metadata.value_type == bool:
                 arr = arr >= 52
         except:
             try:
                 arr = self.simulation.calculate_divide(var, period)
             except:
                 raise e
     if var_metadata.value_type == float:
         arr = arr.round(dp)
     if var_metadata.value_type == Enum:
         arr = arr.decode_to_str()
     if not weighted:
         return arr
     else:
         entity = var_metadata.entity.key
         if map_to:
             arr = self.map_to(arr, entity, map_to, how=how)
             entity = map_to
         return mdf.MicroSeries(arr, weights=self.entity_weights[entity])
Exemplo n.º 9
0
import pandas as pd
import pytest

import microdf as mdf

X = [1, 5, 2]
Y = [0, -6, 3]
W = [4, 1, 1]
df = pd.DataFrame({"x": X, "y": Y, "w": W})
ms = mdf.MicroSeries(X, weights=W)
md = mdf.MicroDataFrame(df[["x", "y"]], weights=W)
# Also make a version with groups.
df2 = df.copy(deep=True)
df2.x *= 2
df2.y *= 1.5
dfg = pd.concat([df, df2])
dfg["g"] = ["a"] * 3 + ["b"] * 3
mdg = mdf.MicroDataFrame(dfg[["x", "y", "g"]], weights=W)


def test_weighted_quantile():
    Q = [0, 0.5, 1]
    mdf.weighted_quantile(df, "x", "w", Q).tolist()


def test_weighted_median():
    assert mdf.weighted_median(df, "x") == 2
    mdf.weighted_median(df, "x", "w")
    # Test with groups.
    mdf.weighted_median(dfg, "x", "w", "g")
Exemplo n.º 10
0
def test_median():
    # 1, 2, 3, 4, *4*, 4, 5, 5, 5
    arr = np.array([1, 2, 3, 4, 5])
    w = np.array([1, 1, 1, 3, 3])
    series = mdf.MicroSeries(arr, weights=w)
    assert series.median() == 4
Exemplo n.º 11
0
def test_decile_rank_rank():
    s = mdf.MicroSeries(
        [5, 4, 3, 2, 1, 6, 7, 8, 9],
        weights=[10, 20, 10, 10, 10, 10, 10, 10, 10, 10],
    )
    assert np.array_equal(s.decile_rank().values, [6, 5, 3, 2, 1, 7, 8, 9, 10])
Exemplo n.º 12
0
def test_quintile_rank():
    s = mdf.MicroSeries([4, 2, 3], weights=[20, 60, 20])
    assert np.array_equal(s.quintile_rank().values, [5, 3, 4])
Exemplo n.º 13
0
def test_quartile_rank():
    s = mdf.MicroSeries([4, 2, 3], weights=[25, 50, 25])
    assert np.array_equal(s.quartile_rank().values, [4, 2, 3])
Exemplo n.º 14
0
def test_percentile_rank():
    s = mdf.MicroSeries([4, 2, 3, 1], weights=[20, 40, 20, 20])
    assert np.array_equal(s.percentile_rank().values, [100, 60, 80, 20])