예제 #1
0
def weighted_percentile(universe, q=0.5):
    df = pd.DataFrame({
        'v': universe.MeanCoverage.tolist(),
        'w': (universe.End - universe.Start).tolist()
    })
    calc = wc.Calculator('w')  # w designates weight
    return calc.quantile(df, 'v', q)
예제 #2
0
def weightedcalcs_quantiles(data, labels, weights, return_quantiles=False):
    calc = wc.Calculator("weights")
    num_categories = len(labels)
    breaks = linspace(0, 1, num_categories + 1)
    data_frame = pd.DataFrame({
        'weights': weights,
        'data': data,
    })
    quantiles = [
        calc.quantile(data_frame, 'data', mybreak) for mybreak in breaks[1:]
    ]

    ret = zeros(len(data))
    for i in range(0, len(quantiles) - 1):
        lower = quantiles[i]
        upper = quantiles[i + 1]
        ret[and_(data > lower, data <= upper)] = labels[i]

    if return_quantiles:
        return ret + 1, quantiles
    else:
        return ret + 1
예제 #3
0
def top_share(values, rank_from_top, weights=None):
    """

    Args:
      values(np.array): Vector of values
      rank_from_top(float): Rank from top (bottom is 1 and top is 0)
      weights(np.array): Weights vector (Default value = None)

    Returns:

    """
    if weights is None:
        weights = ones(len(values))

    calc = wc.Calculator("weights")
    data_frame = pd.DataFrame({
        'weights': weights,
        'data': values,
    })
    quantile = calc.quantile(data_frame, 'data', 1 - rank_from_top)
    return ((data_frame["data"] >= quantile) * data_frame["data"] *
            data_frame["weights"]).sum() / (data_frame["data"] *
                                            data_frame["weights"]).sum()
예제 #4
0
        list(itertools.product(*[occupancy_types, income_strata])),
        list(itertools.product(*[occupancy_types, ["COUNT"], income_strata])))
}

long_lead_data.rename(mapper=count_rename_dict, axis=1, inplace=True)
long_lead_data.columns = long_lead_data.columns.str.split(' ', expand=True)

long_lead_data.columns.rename(names=['OCCUPANCY TYPE', 'INCOME STRATA'],
                              level=[0, 2],
                              inplace=True)

wide_lead_data = long_lead_data.stack(level=[0, 2])
wide_lead_data["ENERGY BURDEN"] = 12 * (
    wide_lead_data["ELEP"] + wide_lead_data["GASP"] +
    wide_lead_data["FULP"]) / wide_lead_data["HINCP"]

features = [
    'TRACT', 'YBL INDEX', 'BLD INDEX', 'HFL INDEX', 'OCCUPANCY TYPE',
    'INCOME STRATA'
]

calc = wc.Calculator("COUNT")
nicely_grouped = wide_lead_data.groupby(features)
print("trying to calc")
print(calc.mean(nicely_grouped, "ELEP").head())

#fully_aggregated = tract_aggregated.groupby("TRACT").apply(wtavg)

#tract_aggregated.to_csv("tractnc2015_cleaned.csv")

#fully_aggregated.to_csv("tractnc2015_aggregated.csv")
예제 #5
0
# PITFALL: This depends on some variables that were removed from python/build/
# in commit 2c82f5faad432aa1971bd940341adef2bf73ea02

exec(open("tax-proposal/2020-08-21/build.py").read())

from python.common.util import near
import numpy as np
import weightedcalcs as weightLib
wc = weightLib.Calculator('weight')

# `qs` is a quantile- (specifically percentile-) level data frame.
qs = pd.DataFrame({"income q": np.arange(0, 1, 0.01)})
qs["income"] = (qs["income q"].apply(lambda q: wc.quantile(hh, "income", q)))
qs = pd.concat([
    qs,
    pd.DataFrame(data={
        "income": [np.inf],
        "income q": [1]
    }, index=[100])
])
qs["income q"] = (qs["income q"].apply(lambda x: round(100 * x)))
qs = qs["income"]

if True:

    def income_quantiles(incomes, thresholds):
        q = 0
        acc = incomes.copy()
        for i in range(0, len(incomes)):
            if q < thresholds.index.max():
                if incomes.iloc[i] >= thresholds.loc[q + 1]:
예제 #6
0
import unittest
import weightedcalcs as wc
import pandas as pd
import sys
import os

calc = wc.Calculator("weights")


class WCTest(unittest.TestCase):
    def test_mean(self):
        # Example via https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
        assert (calc.mean(
            pd.DataFrame({
                "values": [80, 90],
                "weights": [20, 30],
            }), "values") == 86)

    def test_mean_non_pandas(self):
        # Example via https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
        assert (calc.mean({
            "values": [80, 90],
            "weights": [20, 30],
        }, "values") == 86)

    def test_quantile(self):
        # Example via https://en.wikipedia.org/wiki/Weighted_median
        df = pd.DataFrame({
            "values": [0.1, 0.35, 0.05, 0.1, 0.15, 0.05, 0.2],
            "weights": [0.1, 0.35, 0.05, 0.1, 0.15, 0.05, 0.2],
        })
예제 #7
0
                                            "21": "Puno","22":"San Martin","23":"Tacna","24":"Tumbes","25":"Ucayali"
                                            })
sumaria.head(2)

#Generamos variable "area" geografica
sumaria["area"] = sumaria["estrato"].replace({"de 500,000 a más habitantes"       : "Urbana",
                                                "de 100,000 a 499,999 habitantes" : "Urbana",
                                                "de 50,000 a 99,999 habitantes"   : "Urbana",
                                                "de 20,000 a 49,999 habitantes"   : "Urbana",
                                                "de 2,000 a 19,999 habitantes"    : "Urbana",
                                                "de 500 a 1,999 habitantes"       : "Rural",
                                                "Área de empadronamiento rural (aer) simple"    : "Rural",
                                                "Área de empadronamiento rural (aer) compuesto" : "Rural"
                                                })
#sumaria["area"] = np.where(sumaria["estrato"] <= 5, "Urbana", "Rural")
sumaria["area"].value_counts()

#Generamos tablas sin ponderador
pd.crosstab(sumaria["dpto"], sumaria["pc_pobre"])

#Generamos tablas sin ponderador
pd.crosstab([sumaria["area"],sumaria["estrsocial"]], sumaria["pc_pobre"] , margins=True)

#Tasa de pobreza usando factor expansión / ponderador
sumaria["facpop"] = sumaria["factor07"]*sumaria["mieperho"]
calc = wc.Calculator("facpop")

#Distribución de variable "pc_pobre"
calc.distribution(sumaria, "pc_pobre").round(3).sort_values(ascending=False)