def test_weighted_quantile(): Q = [0, 0.5, 1] EXPECTED_UNWEIGHTED = [1, 2, 5] res_unweighted = mdf.weighted_quantile(X, Q).tolist() assert EXPECTED_UNWEIGHTED == res_unweighted res_weighted = mdf.weighted_quantile(X, Q, W).tolist() # Waiting for a better test, given the result isn't exactly the same as # stacking values. # See stackoverflow.com/q/21844024#comment102342137_29677616 # EXPECTED_WEIGHTED = [1, 1, 5] # For now, check that median is less than the unweighted median. assert res_weighted[1] < res_unweighted[1]
def top_x_pct_share(df, col, top_x_pct, w=None): """Calculates top x% share. :param df: DataFrame. :param col: Name of column in df representing value. :param top_x_pct: Decimal between 0 and 1 of the top %, e.g. 0.1, 0.001. :param w: Column representing weight in df. :returns: The share of w-weighted val held by the top x%. """ threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct) top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w) total_sum = mdf.weighted_sum(df, col, w) return top_x_pct_sum / total_sum
def top_x_pct_share(val, top_x_pct, w=None): """Calculates top x% share. Args: val: Value (list-like). top_x_pct: Decimal between 0 and 1 of the top %, e.g. 0.1, 0.001. w: Weight (list-like, same length as val). Returns: The share of w-weighted val held by the top x%. """ val = pd.Series(val) if w is None: w = np.ones(val.size) w = pd.Series(w) threshold = mdf.weighted_quantile(val, 1 - top_x_pct, w) filt = val >= threshold top_x_pct_sum = (val[filt] * w[filt]).sum() total_sum = (val * w).sum() return top_x_pct_sum / total_sum
def total_wealth_by_decile(data, measure): quant_df = pd.DataFrame() for race2 in data.race2.unique(): race_df = data[data.race2 == race2].copy(deep=True) decile_bounds = np.arange(0, 1.1, 0.1) deciles = mdf.weighted_quantile(race_df, measure, "wgt", decile_bounds) race_total_nw = mdf.weighted_sum(race_df, measure, "wgt") quantile_nws = [] for index, value in enumerate(deciles): if index + 1 < len(deciles): quantile_subset = race_df[race_df.networth.between( value, deciles[index + 1])] quantile_nws.append( mdf.weighted_sum(quantile_subset, measure, "wgt")) quantile_nw_pct = (quantile_nws / race_total_nw) * 100 race_quant_df = pd.DataFrame({race2: quantile_nw_pct}, index=np.arange(1, 11, 1)) quant_df = pd.concat([quant_df, race_quant_df], axis=1) return quant_df
def test_weighted_quantile(): Q = [0, 0.5, 1] mdf.weighted_quantile(df, "x", "w", Q).tolist()
def _top_x_pct_share(df, col, top_x_pct, w=None): threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct) top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w) total_sum = mdf.weighted_sum(df, col, w) return top_x_pct_sum / total_sum