def _fit_start_params_hr(self, order): """ Get starting parameters for fit. Parameters ---------- order : iterable (p,q,k) - AR lags, MA lags, and number of exogenous variables including the constant. Returns ------- start_params : array A first guess at the starting parameters. Notes ----- If necessary, fits an AR process with the laglength selected according to best BIC. Obtain the residuals. Then fit an ARMA(p,q) model via OLS using these residuals for a first approximation. Uses a separate OLS regression to find the coefficients of exogenous variables. References ---------- Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed autoregressive-moving average order." `Biometrika`. 69.1. """ p, q, k = order start_params = zeros((p + q + k)) endog = self.endog.copy() # copy because overwritten exog = self.exog if k != 0: ols_params = GLS(endog, exog).fit().params start_params[:k] = ols_params endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar resid = endog[p_tmp:] - np.dot( lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: endog_start = p_tmp + q - p resid_start = 0 else: endog_start = 0 resid_start = p - p_tmp - q lag_endog = lagmat(endog, p, 'both')[endog_start:] lag_resid = lagmat(resid, q, 'both')[resid_start:] # stack ar lags and resids X = np.column_stack((lag_endog, lag_resid)) coefs = GLS(endog[max(p_tmp + q, p):], X).fit().params start_params[k:k + p + q] = coefs else: start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0] if q == 0 and p != 0: arcoefs = yule_walker(endog, order=p)[0] start_params[k:k + p] = arcoefs return start_params
def fit(self, lambd=1.): #this does duplicate transformation, but I need resid not wresid res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit() self.res_gls = res_gls sigma2_e = res_gls.mse_resid r_matrix = self.r_matrix q_matrix = self.q_matrix sigma_prior_inv = self.sigma_prior_inv x = self.wexog y = self.wendog[:, None] #why are sigma2_e * lambd multiplied, not ratio? #larger lambd -> stronger prior (it's not the variance) #print 'lambd inside fit', lambd xpx = np.dot(x.T, x) + \ sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, r_matrix)) xpy = np.dot(x.T, y) + \ sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, q_matrix)) #xpy = xpy[:,None] xpxi = np.linalg.pinv(xpx) params = np.dot(xpxi, xpy) #or solve params = np.squeeze(params) self.normalized_cov_params = xpxi #why attach it to self, i.e. model? lfit = TheilRegressionResults(self, params, normalized_cov_params=xpxi) lfit.penalization_factor = lambd return lfit
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog) ols_res = OLS(data.endog, data.exog).fit() gls_res = GLS(data.endog, data.exog).fit() cls.res1 = gls_res cls.res2 = ols_res
def fit(self): """ """ delta = [] wexog = self.wexog endog = self.endog for j in range(self._M): delta.append(GLS(endog[j], wexog[j]).fit().params) return delta
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1', 'dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float( self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - tools.rank(_)) \ for _ in sys[1::2]] [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix( (int(self.nobs * M), int(np.sum(self.df_model + 1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model + 1))) for i in range(M): sp_exog[i * self.nobs:(i + 1) * self.nobs, self._cols[i]:self._cols[i + 1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append( GLS(endog[i], exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid) resids = np.asarray(resids).reshape(M, -1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def _fit_btwn(self, method, effects): # group mean regression or WLS if effects != "twoway": endog = self._group_mean(self.endog, index=effects) exog = self._group_mean(self.exog, index=effects) else: raise ValueError("%s effects is not valid for the between \ estimator" % s) befit = GLS(endog, exog).fit() return befit
def setupClass(cls): from results.results_regression import LongleyGls data = longley.load() exog = add_constant(np.column_stack(\ (data.exog[:,1],data.exog[:,4]))) tmp_results = OLS(data.endog, exog).fit() rho = np.corrcoef(tmp_results.resid[1:], tmp_results.resid[:-1])[0][1] # by assumption order = toeplitz(np.arange(16)) sigma = rho**order GLS_results = GLS(data.endog, exog, sigma=sigma).fit() cls.res1 = GLS_results cls.res2 = LongleyGls()
def fit(self, lambd=1.): #maybe iterate #preliminary estimate res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit() res_resid = OLS(res_gls.resid**2, self.exog_var).fit() #or log-link #res_resid = OLS(np.log(res_gls.resid**2), self.exog_var).fit() #here I could use whiten and current instance instead of delegating #but this is easier #see pattern of GLSAR, calls self.initialize and self.fit res_wls = WLS(self.endog, self.exog, weights=1. / res_resid.fittedvalues).fit() res_wls._results.results_residual_regression = res_resid return res_wls
def fit(self, model=None, method=None, effects='oneway'): """ method : LSDV, demeaned, MLE, GLS, BE, FE, optional model : between fixed random pooled [gmm] effects : oneway time twoway femethod : demeaned (only one implemented) WLS remethod : swar - amemiya nerlove walhus Notes ------ This is unfinished. None of the method arguments work yet. Only oneway effects should work. """ if method: # get rid of this with default method = method.lower() model = model.lower() if method and method not in ["lsdv", "demeaned", "mle", "gls", "be", "fe"]: # get rid of if method with default raise ValueError("%s not a valid method" % method) # if method == "lsdv": # self.fit_lsdv(model) if model == 'pooled': return GLS(self.endog, self.exog).fit() if model == 'between': return self._fit_btwn(method, effects) if model == 'fixed': return self._fit_fixed(method, effects)
def whiten(self, Y): """ Runs the first stage of the 2SLS. Returns the RHS variables that include the instruments. """ wexog = [] indep_endog = self._indep_endog # this has the col mapping # fullexog = self.fullexog instruments = self.instruments for eq in range( self._M): # need to go through all equations regardless instr_eq = Y.get(eq, None) # Y has the eq to ind endog array map newRHS = self.exog[eq].copy() if instr_eq: for i, LHS in enumerate(instr_eq): yhat = GLS(LHS, self.instruments).fit().fittedvalues newRHS[:, indep_endog[eq][i]] = yhat # this might fail if there is a one variable column (nobs,) # in exog wexog.append(newRHS) return wexog
def _fit_fixed(self, method, effects): endog = self.endog exog = self.exog demeantwice = False if effects in ["oneway","twoways"]: if effects == "twoways": demeantwice = True effects = "oneway" endog_mean, counts = self._group_mean(endog, index=effects, counts=True) exog_mean = self._group_mean(exog, index=effects) counts = counts.astype(int) endog = endog - np.repeat(endog_mean, counts) exog = exog - np.repeat(exog_mean, counts, axis=0) if demeantwice or effects == "time": endog_mean, dummies = self._group_mean(endog, index="time", dummies=True) exog_mean = self._group_mean(exog, index="time") # This allows unbalanced panels endog = endog - np.dot(endog_mean, dummies) exog = exog - np.dot(dummies.T, exog_mean) fefit = GLS(endog, exog[:,-self._cons_index]).fit() #TODO: might fail with one regressor return fefit
def setupClass(cls): from gwstatsmodels.datasets.ccard import load data = load() cls.res1 = WLS(data.endog, data.exog, weights=1 / data.exog[:, 2]).fit() cls.res2 = GLS(data.endog, data.exog, sigma=data.exog[:, 2]).fit()