Exemplo n.º 1
0
def pooled_ols(panel_data, formula, weights=None, cov="unadjusted"):
    """
    Fits a standard Pooled OLS model with the corresponding covariance matrix.
    Remember to include an intercept in the formula ('y ~ 1 + x1 + ...') and to assign it to an object!

    :param panel_data : dataframe (which must be in a panel structure)
    :param formula : patsy formula
    :param weights : N x 1 Series or vector containing weights to be used in estimation; defaults to None
        Use is recommended when analyzing survey data, passing on the weight available in the survey
    :param cov : str
        unadjusted: common standard errors
        robust: robust standard errors
        kernel: robust to heteroskedacity AND serial autocorrelation
        clustered: clustered standard errors by the entity column
    :return : linearmodels model instance
    """

    ## Creating model instance
    if weights is None:
        mod = PooledOLS.from_formula(formula=formula, data=panel_data)
    else:
        mod = PooledOLS.from_formula(formula=formula, data=panel_data, weights=weights)

    ## Fitting with desired covariance matrix
    mod = mod.fit(cov_type='clustered', cluster_entity=True) if cov == 'clustered' else mod.fit(cov_type=cov)

    # Prints summary and returning
    print(mod.summary)
    return mod
Exemplo n.º 2
0
def Reg_Painel_MQO_Agrupado(x, y, constante="S", cov="normal"):
    '''
    Função que calcula uma regressão por MQO agrupado, sendo, por default, computada com um intercepto e com erros padrões  robustos.
    **IMPORTANTE: para o painel estar arrumado, os dados devem estar multi-indexados por indíviduo e por tempo, nesta ordem.
    Caso contrário, transformar o dataframe usando a função 'Arrumar Painel'
    x: lista ou array com os valores das variáveis independentes;
    y: lista ou array com os valores da variável dependente;
    constante: "S" para regressão com intercepto e qualquer outro valor para sem intercepto. Caso em branco, a regressão é computada com intercepto;
    cov: "normal" para regressão com erros-padrão tradicionais (caso padrão);
        "robust" para erros-padrões robustos.
        "cluster" ou "clustered" para erros-padrões clusterizados
    '''
    global df, Resultado

    # formando o vetor de variáveis independentes
    if constante == "S":
        X = sm.add_constant(x)
    else:
        X = x

    #Criando o Modelo levando em conta a opção do erro padrão
    Modelo = PooledOLS(y, X)

    if cov == "robust":
        Resultado = Modelo.fit(cov_type='robust')
    elif cov == 'kernel':  ## correlação robusta à heteroscedasticidade e autocorrelação serial
        Resultado = Modelo.fit(cov_type='kernel')
    elif cov == 'clustered' or cov == 'cluster':
        Resultado = Modelo.fit(cov_type='clustered', cluster_entity=True)
    else:
        Resultado = Modelo.fit()
    print(Resultado)
def get_pols(
    regression_variables: List[Tuple],
    data: Dict[str, pd.DataFrame],
    datasets: Dict[pd.DataFrame, Any],
) -> Tuple[DataFrame, Any, List[Any], Any]:
    """PooledOLS is just plain OLS that understands that various panel data structures.
    It is useful as a base model. [Source: LinearModels]

    Parameters
    ----------
    regression_variables : list
        The regressions variables entered where the first variable is
        the dependent variable.
    data : dict
        A dictionary containing the datasets.
    datasets: dict
        A dictionary containing the column and dataset names of
        each column/dataset combination.

    Returns
    -------
    The dataset used, the dependent variable, the independent variable and
    the Pooled OLS model.
    """

    regression_df, dependent_variable, independent_variables = get_regression_data(
        regression_variables, data, datasets, "POLS")

    if regression_df.empty:
        model = None
    else:
        with warnings.catch_warnings(record=True) as warning_messages:
            exogenous = add_constant(regression_df[independent_variables])
            model = PooledOLS(regression_df[dependent_variable],
                              exogenous).fit()
            console.print(model)

            if len(warning_messages) > 0:
                console.print("Warnings:")
                for warning in warning_messages:
                    console.print(f"[red]{warning.message}[/red]".replace(
                        "\n", ""))

    return regression_df, dependent_variable, independent_variables, model
Exemplo n.º 4
0
panel = pd.DataFrame()

for ticker in tickers:

    tickerData = pd.DataFrame(estimates[ticker])
    tickerData['ticker'] = ticker

    panel = panel.append(tickerData)

panel = panel.reset_index()

corrVars = [x for x in panel.columns if x not in ['month', 'ticker']]
panel[corrVars].corr()
''' run a pooled regression of each of the transaction cost estimates against the three explanatory variables  '''

# There seems to be four explanatory variables?

regResult = dict()

dependent = ['rollEstimate', 'abdiEstimate', 'amihudRatio']
exog = ['avgVolume', 'avgMarketCap', 'dailyVol', 'invPriceAvg']

panel = panel.set_index(['ticker', 'month'])

for y in dependent:
    mod = PooledOLS(panel[y], panel[exog])
    res = mod.fit()
    regResult[y] = res

###
Exemplo n.º 5
0
#%% imports and read data
import pandas as pd
from linearmodels import PooledOLS
import statsmodels.api as sm

gunsraw = pd.read_csv("/Users/oliverwidder/PycharmProjects/ma/data/guns.csv",
                      usecols=["stateid", "year", "avginc", "vio"],
                      index_col=["stateid", "year"])
guns = gunsraw.rename(columns={
    "avginc": "income",
    "stateid": "state",
    "vio": "violent"
})

#%% transform data
years = guns.index.get_level_values("year").to_list()
guns["year"] = pd.Categorical(years)

#%% pooled OLS

exog = sm.tools.tools.add_constant(guns['income'])
endog = guns['violent']
mod = PooledOLS(endog, exog)
pooledOLS_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(pooledOLS_res)
Exemplo n.º 6
0
#%% import and read
import pandas as pd
from linearmodels import PooledOLS
import statsmodels.api as sm

iMapp = pd.read_stata(
    "/Users/oliverwidder/PycharmProjects/ma/data/iMapp_Q_credit_gdp.dta")
iMapp["Year-Q"] = iMapp["Year"].astype(str) + "-" + iMapp["Quarter"].astype(
    str)
iMapp.set_index(["iso3", "dateq"], inplace=True)
del iMapp["index"]
del iMapp["Country"]
del iMapp["Year"]
del iMapp["Quarter"]

#%% AE / EMDE

iMapp_AE = iMapp[iMapp["AE"] == 1]
iMapp_EMDE = iMapp[iMapp["EMDE"] == 1]

#%% pooled OLS

exog_cols = iMapp.columns[2:-6]

exog_AE = sm.tools.tools.add_constant(
    iMapp_AE[["SUM_17", "Conservation", "CCB", "LTV_Qmean"]])
endog_AE = iMapp_AE["credit_yoy"]
mod_AE = PooledOLS(endog_AE, exog_AE)
pooledOLS_AE_res = mod_AE.fit(cov_type='clustered', cluster_entity=True)
print(pooledOLS_AE_res)
Exemplo n.º 7
0
def fm_summary(p):
    s = p.describe().T
    s['std_error'] = s['std'] / np.sqrt(s['count'])
    s['tstat'] = s['mean'] / s['std_error']
    s['pval'] = stats.t.sf(np.abs(s['tstat']), s['count'] - 1) * 2
    return s[['mean', 'std_error', 'tstat', 'pval']]


gamma = data1.groupby('date').apply(
    ols_coef, 'ret ~ 1 + dummy + mc + prc + bm + div + ret23 + ret46 + ret712')
res = fm_summary(gamma)
res.pval = [round(x, 3) for x in res.pval.values.tolist()]

gamma = data1.groupby('date').apply(
    ols_coef,
    'ret ~ 1 +Association+Sanctions+Financial+Corruption+Information+Human+Workplace+Production_Supply+Environmental+Management+Workforce+Regulatory+Fraud+Anti_Competitive+Ownership+Product_Service+Discrimination_Workforce+mc + prc + bm + div + ret23 + ret46 + ret712'
)
res = fm_summary(gamma)
res.pval = [round(x, 3) for x in res.pval.values.tolist()]
res
'''pooled OLS with double-clustered standard error'''
data1.date = data1.date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))
data1 = data1.set_index(['tickers', 'date'])
data1['const'] = 1
mod = PooledOLS(
    data1['ret'], data1[[
        'dummy', 'mc', 'prc', 'bm', 'div', 'ret23', 'ret46', 'ret712', 'const'
    ]])
res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)
Exemplo n.º 8
0
    for ticker in tickers:

        tickerData = pd.DataFrame(estimates[ticker]).iloc[14:(14 + 60 + t), ]
        tickerData['ticker'] = ticker  # so it's growing window

        panel = panel.append(tickerData)

    dependent = ['rt']
    exog = [
        '12mthMomentum', 'dividendYield', 'monthlyVolatility', 'shortRates',
        'slopeTermstructure'
    ]

    panel = panel.set_index(['ticker', panel.index])

    mod = PooledOLS(panel[dependent], panel[exog])
    resReg[74 + t] = mod.fit().params

resReg1 = pd.DataFrame(resReg).transpose()

resReg1.index = np.arange(74, len(resReg1) + 74)

# forcast return based on the panel regression estimators
df_rt_pred = pd.DataFrame()

for ticker in tickers:
    df_rt_pred[ticker] = estimates[ticker]['12mthMomentum'][74:430] * resReg1['12mthMomentum'].ix[74:430] + \
    estimates[ticker]['dividendYield'][74:430] * resReg1['dividendYield'].ix[74:430] + \
    estimates[ticker]['monthlyVolatility'][74:430] * resReg1['monthlyVolatility'].ix[74:430] + \
    estimates[ticker]['shortRates'][74:430] * resReg1['shortRates'].ix[74:430] + \
    estimates[ticker]['slopeTermstructure'][74:430] * resReg1['slopeTermstructure'].ix[74:430]