def __init__(self, frame, share_column='vote_share', group_by='state', covariate_columns=None, weight_column=None, year_column='year', redistrict_column=None, district_id='district_id', missing='drop', uncontested=None, break_on_GIGO=True): super().__init__(frame, share_column=share_column, covariates=covariate_columns, weight_column=weight_column, year_column=year_column, redistrict_column=redistrict_column, district_id=district_id, missing=missing, uncontested=uncontested, break_on_GIGO=break_on_GIGO) self._years = np.sort(self.long.year.unique()) self._covariate_cols += ['grouped_vs'] self._decade_starts = np.sort( list(set([_year_to_decade(yr) for yr in self.years]))) self.decades = {dec: [] for dec in self._decade_starts} for i, (yr, wide) in enumerate(zip(self.years, self.wide)): if group_by is not None: grouped_vs = wide.groupby( group_by).vote_share.mean().to_frame() grouped_vs.columns = ['grouped_vs'] grouped_vs = grouped_vs.fillna(.5) self.wide[i] = wide.merge(grouped_vs, left_on=group_by, right_index=True) self.wide[i]['grouped_vs'] = self.wide[i]['grouped_vs'].fillna( .5) else: grouped_vs = wide.vote_share.mean() self.wide[i]['grouped_vs'] = grouped_vs self.decades[_year_to_decade(yr)].append(self.wide[i]) self.models = [] for yr in self._decade_starts: self.decades[yr] = pd.concat(self.decades[yr], axis=0, sort=True) # WLS Yields incredibly precise simulation values? Not sure why. X = sm.add_constant(self.decades[yr][self._covariate_cols]).values Y = self.decades[yr].vote_share.values Y[np.isnan(Y)] = self.decades[yr]['grouped_vs'].values[np.isnan(Y)] if weight_column is None: weights = None self.models.append(sm.GLS(Y, X).fit()) else: weights = self.decades[yr].weight self.models.append(sm.GLS(Y, X, sigma=weights).fit())
def compare_models(y, smaller_model_list, bigger_model_list): model_null = sm.GLS(y,smaller_model_list) model_alternative = sm.GLS(y, bigger_model_list) params_null = sm.GLS(y,smaller_model_list).fit().params params_alternative = sm.GLS(y, bigger_model_list).fit().params null = model_null.loglike(params_null) alternative = model_alternative.loglike(params_alternative) info_small_model = model_null.fit().summary() info_big_model = model_alternative.fit().summary() rejected = llr_test(null, alternative) return rejected, info_small_model, info_big_model
def best_model_in_the_class(dict_data, combinations): helper_dict = {} helper_list = [] combinations_dict = {} r_squared = {i: [] for i in range(0, len(combinations))} for i in range(0, len(combinations)): combinations_dict[i] = combinations[i] #create weights #create proper data for Linear Regression for key in combinations_dict.keys(): for i in range(0, len(dict_data['Intel'])): for item in combinations_dict[key]: helper_dict[i] = [v[i] for k, v in dict_data.items() if k in combinations_dict[key]] helper_list = [value for key, value in sorted(helper_dict.items())] model = sm.GLS(y, helper_list) regression = model.fit() r = regression.rsquared r_squared[key].append(r) #map the results into final determination_combination_dict = {} for k1 in combinations_dict.keys(): for k2 in r_squared.keys(): if k1 == k2: determination_combination_dict[float(r_squared[k2][0])] = list(combinations_dict[k1]) #choose the best model among the class best_r = max(determination_combination_dict.keys()) best_model = determination_combination_dict[best_r] return best_model, determination_combination_dict, r_squared, helper_dict, helper_list
def vcfassoc(formula, covariate_df, groups=None): y, X = patsy.dmatrices(str(formula), covariate_df, return_type='dataframe') # get the column containing genotype ix = get_genotype_ix(X) Binomial = sm.families.Binomial logit = sm.families.links.Logit() if groups is not None: #covariate_df['grps'] = map(str, range(len(covariate_df) / 8)) * 8 if not isinstance(groups, (pd.DataFrame, np.ndarray)): cov = Exchangeable() model = sm.GEE(y, X, groups=covariate_df[groups], cov_struct=cov, family=Binomial()) else: model = sm.GLS(logit(y), X, sigma=groups.ix[X.index, X.index]) else: model = sm.GLM(y, X, missing='drop', family=Binomial()) result = model.fit(maxiter=1000) res = { 'OR': np.exp(result.params[ix]), 'pvalue': result.pvalues[ix], 'z': result.tvalues[ix], 'OR_CI': tuple(np.exp(result.conf_int().ix[ix, :])), } try: res['df_resid'] = result.df_resid except AttributeError: pass return res
def gls(data, xseq, **params): """ Fit GLS """ if params['formula']: return gls_formula(data, xseq, **params) X = sm.add_constant(data['x']) Xseq = sm.add_constant(xseq) init_kwargs, fit_kwargs = separate_method_kwargs(params['method_args'], sm.OLS, sm.OLS.fit) model = sm.GLS(data['y'], X, **init_kwargs) results = model.fit(**fit_kwargs) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(Xseq) if params['se']: alpha = 1 - params['level'] prstd, iv_l, iv_u = wls_prediction_std(results, Xseq, alpha=alpha) data['se'] = prstd data['ymin'] = iv_l data['ymax'] = iv_u return data
def simple_regression_example(): # Load data. spector_data = sm.datasets.spector.load() spector_data.exog = sm.add_constant(spector_data.exog, prepend=False) # Fit and summarize model. # OLS: ordinary least squares for i.i.d. errors Sigma = I. mod = sm.OLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # GLS: generalized least squares for arbitrary covariance Sigma. mod = sm.GLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # WLS: weighted least squares for heteroskedastic errors diag(Sigma). mod = sm.WLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # GLSAR: feasible generalized least squares with autocorrelated AR(p) errors Sigma = Sigma(rho). mod = sm.GLSAR(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary())
def test_sm_GLSAR(): print("Testing SM, GLSAR...") X, y = iris_data est = sm.GLS(y, X, rho=2) mod = est.fit() docs = {'name': "GLSAR test"} fv = X[0, :] upload(mod, fv, docs)
def simple_model(X, Y, type="LR"): if type == "LR": simple_model = sm.GLS(Y, X) elif type == "NN": simple_model = NN_FF(10, X.shape[1]) else: raise NotImplementedError("BAD BOI") return simple_model
def gls_fit(self, X, y, add_const=True): if add_const: X_ = sm.add_constant(X) mod = sm.GLS(y.values, np.asarray(X_)) res = mod.fit() #print(res.summary()) res = res.get_robustcov_results() self.reg = res print(res.summary()) # Robusted Results
def pgls(responseList, predictor, phyCovMatrix, intercept=True): #predictor = np.array(predictor).T # transpose it to make sure sm doesn't choke # TODO: should I set an intercept, or no? # Add a column of 1s at the beginning of exog so the model incorporates an intercept if intercept: predictor = np.array([(1, row) for row in predictor]) # now it is n x 2 # return the model return sm.GLS(endog=responseList, exog=predictor, sigma=phyCovMatrix)
def test_sm_GLS(): print("Testing SM, GLS...") data = sm.datasets.longley.load(as_pandas=False) X = sm.add_constant(data.exog) est = sm.GLS(data.endog, X, sigma=1) mod = est.fit() docs = {'name': "GLS test"} fv = X[0, :] upload(mod, fv, docs)
def compare_models(self, smaller_model_list, bigger_model_list): y = self.dependent_variable[list(self.dependent_variable.keys())[0]] model_null = sm.GLS(y, smaller_model_list) model_alternative = sm.GLS(y, bigger_model_list) params_null = sm.GLS(y, smaller_model_list).fit().params params_alternative = sm.GLS(y, bigger_model_list).fit().params null = model_null.loglike(params_null) alternative = model_alternative.loglike(params_alternative) self.info_small_model = model_null.fit().summary() self.info_big_model = model_alternative.fit().summary() self.rejected = self.llr_test(null, alternative) return self.rejected, self.info_small_model, self.info_big_model
def linear_regression(self): """ Notes ----- Only the following combinations make sense for family and link :: + ident log logit probit cloglog pow opow nbinom loglog logc Gaussian | x x x inv Gaussian | x x x binomial | x x x x x x x x x Poission | x x x neg binomial | x x x x gamma | x x x Examples -------- >>> from cost_functions import * >>> imp = imp = Import('./', 'nyc-condominium-dataset.csv') >>> df = imp.import_housing_data() >>> x_var = ['comp_full_market_value', 'comp2_full_market_value', 'gross_sqft'] >>> reg = Regression(df, 'full_market_value', x_var, 2.5, 2.5) >>> df_final = reg.linear_regression() >>> cf = CostFunction(df_final, 'full_market_value', 'predicted', 'district', 10, 2.5, 2.5) >>> cf.all_glm() """ df = self.df y = df[self.y] #Remove Outliers ol = y.describe()['mean'] - self.std_ol * y.describe()['std'] oh = y.describe()['mean'] + self.std_oh * y.describe()['std'] df = df[(df[self.y] > ol) & (df[self.y] < oh)] #Remove missing values df = df[(df['comp2_full_market_value'] > 0) & (df['comp_full_market_value'] > 0) & (df['full_market_value'] > 0) & (df[self.y] > ol) & (df[self.y] < oh)] df = df.reset_index(drop=True) y = df[self.y] #y.describe() x = sm.add_constant(df[self.x]) #x.describe() #model = sm.GLM(Y, X, family = sm.families.Binomial()) #model = sm.GLM(y, x, family=sm.families.Gaussian()) #Note: GLS returns same results as GLM where you set const=1 and family=sm.families.Gaussian() m2 = sm.GLS(y, x) r2 = m2.fit() df_predicted = pd.DataFrame(r2.predict(), columns=['predicted']) df_final = df.combine_first(df_predicted) print r2.summary() return df_final
def refresh(self, X, Y): X, Y = self._prepare_data_for_fit(X, Y) ols_resid = sm.OLS(Y, X).fit().resid # rezidua OLS res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit() # vypocet korelace mezi rezidui rho = res_fit.params # autoregresni parametr order = toeplitz(np.arange(len(ols_resid))) # some magic sigma = rho ** order self._model = sm.GLS(Y, X, sigma=sigma).fit()
def regr_gls_sm(y: Union[np.ndarray, pd.DataFrame], x: Union[np.ndarray, pd.DataFrame], **param): ''' Use: ''' # X = np.column_stack( (np.ones(N), x**2) ) # ones at beg, BUT need length # if str(type(x)) == "<class 'numpy.ndarray'>" or type(x) is pd.core.frame.DataFrame: if type(x) == np.ndarray or type(x) is pd.DataFrame: X = sm.add_constant(x) else: X = np.array(x).T X = sm.add_constant(X) if not str(type( y)) == "<class 'numpy.ndarray'>" or not type(y) is pd.DataFrame: y = np.array(y) model = sm.OLS(y, X) fit_ols = model.fit() # fit_ols = strat['model'][eqn_f]['fit_model'] # y, X = strat['model'][eqn_f]['df_train_filter'].iloc[:,0], strat['model'][eqn_f]['df_train_filter'].iloc[:,1:] ols_resid = fit_ols.resid ols_resid = ols_resid.to_numpy() resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() # print(resid_fit.tvalues[1]) # print(resid_fit.pvalues[1]) rho = resid_fit.params[1] order = toeplitz(range(len(ols_resid))) # so that our error covariance structure is actually rho**order which defines an autocorrelation structure sigma = rho**order model_gls = sm.GLS(y, X, sigma=sigma) fit_gls = model_gls.fit() model_glsar = sm.GLSAR(y, X, 1) fit_glsar = model_glsar.iterative_fit(1) # print(strat['model'][eqn_f]['fit_model'].summary()) # print(gls_results.summary()) # print(glsar_results.summary()) # strat['model'][eqn_f]['fit_gls'] = gls_results # strat['model'][eqn_f]['fit_glsar'] = glsar_results # # print(gls_results.params) # print(glsar_results.params) # print(gls_results.bse) # print(glsar_results.bse) return fit_gls, fit_glsar """
def __init__(self, data, normalize=False, t_value_threshold=2.3, **kwargs): # call parent function. RegressionModel.__init__(self, data, normalize=normalize, **kwargs) # placeholders specific to this class. self.model = None self.trained_model = None self.t_value_threshold = t_value_threshold # initialise a statsmodels OLS instance. self.model = sm.GLS(self.train_y, self.train_x)
def GLS(Y, X, e, r): res_fit = sm1.OLS(e[1:], e[:-1]).fit() rho = res_fit.params order = toeplitz(np.arange(len(X))) sigma = rho**order gls_model = sm1.GLS(Y, X, sigma=sigma) gls_results = gls_model.fit() print(gls_results.summary()) E = gls_results.resid return E
def do_gls(y, X, covariance_matrix): ''' generalized least squares using the est_covariance input: data, estimated covariance matrix output: result, coeff param, residual ''' gls_mod = sm.GLS(endog=y, exog=X, sigma=covariance_matrix) gls_res = gls_mod.fit() residual = gls_res.resid return gls_res, residual
def LinearRegressionPrediction(mat, minidx, linlag): # load dataset n = len(mat); m = len(mat[0]); #calculate "latest range" startidx = max(minidx - linlag, 0); #??? slopes = np.array([0.0] * m); slopes2 = np.array([0.0] * m); intercepts = np.array([0.0] * m); for i in range(0, m): # per column ts = mat[:, i]; use_SM = False; slope = 0.0; intercept = 0.0; if use_SM: model = sm.GLS(np.array(ts[startidx:minidx]), range(startidx, minidx)); model_fit = model.fit(); slope = model_fit.params[0]; intercept = model_fit.scale; else: model = scistat.linregress(np.array(range(startidx, minidx)), ts[startidx:minidx]); slope = model.slope; intercept = model.intercept; #end if slopes[i] = slope; intercepts[i] = intercept; slope = slope * 1.1; adjust_intercept = ts[minidx - 1] - ((minidx - 1) * slope + intercept); intercept = intercept + adjust_intercept; #prediction for j in range(minidx, n): if (j != minidx) and (((j - minidx) % 8) == 0): slope *= 0.85; adjust_intercept = ts[j - 1] - ((j - 1) * slope + intercept); # intercept depends on the slope for continuity of prediction intercept = intercept + adjust_intercept; # so it has to be re-adjusted if slope is changed #end if ts[j] = (j * slope) + intercept; #end for slopes2[i] = slope; mat[:, i] = ts; #end for #np.savetxt("/home/zakhar/ReVival/UDF/py_predict/test.txt", np.array([slopes, slopes2, intercepts])); return mat;
def gls_model(): # Generalized Least Squares (GLS) data = get_dataset("longley") data.exog = sm.add_constant(data.exog) ols_resid = sm.OLS(data.endog, data.exog).fit().resid res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit() rho = res_fit.params order = toeplitz(np.arange(16)) sigma = rho**order gls = sm.GLS(data.endog, data.exog, sigma=sigma) model = gls.fit() return ModelWithResults(model=model, alg=gls, inference_dataframe=data.exog)
def solve_GLS(X=None, y=None, sigma=None): """ Solve a multiple linear problem using statsmodels GLS """ goForGLS = X.copy() regr = sm.GLS(y, goForGLS, sigma=sigma).fit() while True: regr = sm.GLS(y, goForGLS, sigma=sigma).fit() # if (regr.pvalues > 0.05).any(): if (regr.params < 0).any(): # Some variable are 0, drop them. # goForGLS.drop(goForGLS.columns[regr.pvalues>0.05],axis=1,inplace=True) # goForGLS.drop(goForGLS.columns[regr.pvalues == max(regr.pvalues)],axis=1,inplace=True) goForGLS.drop(goForGLS.columns[regr.params == min(regr.params)],axis=1,inplace=True) else: # Ok, the run converged break if goForGLS.shape[1]==0: # All variable were droped... Pb print("Warning: The run did not converge...") break # print(regr.summary()) return regr
def getReturn(self): self._y = np.append(self._pi, self._q) self._V = np.zeros((self._gammaSigma.shape[0] + self._Omega.shape[0], self._gammaSigma.shape[0] + self._Omega.shape[0])) self._V[:self._gammaSigma.shape[0], :self._gammaSigma. shape[0]] += self._gammaSigma self._V[self._gammaSigma.shape[0]:, self._gammaSigma.shape[0]:] += self._Omega self._X = np.vstack([np.identity(self._N), self._P]) # Below is the statsmodels GLS implementation import statsmodels.api as sm self._fit = sm.GLS(self._y, self._X, sigma=self._V).fit() self._ret = self._fit.params
def test_fast_scanner_statsmodel_gls(): import statsmodels.api as sm from numpy.linalg import lstsq def _lstsq(A, B): return lstsq(A, B, rcond=None)[0] data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) ols_resid = sm.OLS(data.endog, data.exog).fit().resid resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() rho = resid_fit.params[1] order = toeplitz(range(len(ols_resid))) sigma = rho ** order QS = economic_qs(sigma) lmm = LMM(data.endog, data.exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() best_beta_se = _lstsq(data.exog.T @ _lstsq(lmm.covariance(), data.exog), eye(7)) best_beta_se = sqrt(best_beta_se.diagonal()) assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-5) endog = data.endog.copy() endog -= endog.mean(0) endog /= endog.std(0) exog = data.exog.copy() exog -= exog.mean(0) with errstate(invalid="ignore", divide="ignore"): exog /= exog.std(0) exog[:, 0] = 1 lmm = LMM(endog, exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() gls_model = sm.GLS(endog, exog, sigma=sigma) gls_results = gls_model.fit() beta_se = gls_results.bse our_beta_se = sqrt(scanner.null_beta_covariance.diagonal()) # statsmodels scales the covariance matrix we pass, that is why # we need to account for it here. assert_allclose(our_beta_se, beta_se / sqrt(gls_results.scale)) assert_allclose(scanner.null_beta_se, beta_se / sqrt(gls_results.scale))
def best_model_in_the_class(self, combinations): helper_dict = {} helper_list = [] combinations_dict = {} y = self.dependent_variable[list(self.dependent_variable.keys())[0]] #r_squared = {i: [] for i in range(0, len(combinations))} aic_coef = {i: [] for i in range(0, len(combinations))} for i in range(0, len(combinations)): combinations_dict[i] = combinations[i] #create weights #create proper data for Linear Regression for key in combinations_dict.keys(): for i in range(0, len(self.dict_data['Intel'])): for item in combinations_dict[key]: helper_dict[i] = [ v[i] for k, v in self.dict_data.items() if k in combinations_dict[key] ] helper_list = [ value for key, value in sorted(helper_dict.items()) ] model = sm.GLS(y, helper_list) regression = model.fit() aic = regression.aic aic_coef[key].append(aic) #r = regression.rsquared #r_squared[key].append(r) #map the results into final # determination_combination_dict = {} # for k1 in combinations_dict.keys(): # for k2 in r_squared.keys(): # if k1 == k2: # determination_combination_dict[float(r_squared[k2][0])] = list(combinations_dict[k1]) aic_combinations_dict = {} for k1 in combinations_dict.keys(): for k2 in aic_coef.keys(): if k1 == k2: aic_combinations_dict[float(aic_coef[k2][0])] = list( combinations_dict[k1]) best_aic = min(aic_combinations_dict.keys()) self.best_model_in_class = aic_combinations_dict[best_aic] #choose the best model among the class # best_r = max(determination_combination_dict.keys()) #self.best_model_in_class = determination_combination_dict[best_r] return self.best_model_in_class
def reg_m(y, x, estimator, weights=None): ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) if estimator == 'ols': return sm.OLS(y, X).fit() elif estimator == 'wls': return sm.WLS(y, X, weights).fit() elif estimator == 'gls': return sm.GLS(y, X).fit() return None
def SPGLS(context): # 从 Context 中获取相关数据 args = context.args # 查看上一节点发送的 args.inputData 数据 df = args.inputData featureColumns = args.featureColumns labelColumn = args.labelColumn features = df[featureColumns].values label = df[labelColumn].values arma_mod = sm.GLS(label, features, missing=args.missing) arma_res = arma_mod.fit(method=args.method) return arma_res
def getReward(self): """Computes the metric for our reward""" # pdb.set_trace() X = self.X.T[self.features[0]].T if X.shape[1] == 0: return 0 results = sm.GLS(self.Y, X).fit() # estimated weights # w_linalg = np.dot(np.dot(np.linalg.inv(np.dot(self.X.T[self.features[0]], self.X.T[self.features[0]].T)), self.X.T[self.features[0]]), self.Y) # # estimate R^2 # R_sqred = np.square(np.dot(self.X.T[self.features[0]].T, w_linalg) - np.mean(self.Y)).sum(axis=0) / np.square(self.Y - np.mean(self.Y)).sum(axis=0) # # # estimated adjusted R^2 # adj_R_squard = 1 - (1 - R_sqred**2)*(self.X.shape[0] -1) / (self.X.shape[0] -len(self.features[0])-1) return results.rsquared_adj
def calibrate(volume_duration, strikes, reps): volume_duration = volume_duration.unstack(['Half-spread', 'Strike']) arrival_rate = volume_duration.groupby('Class').apply( lambda c: compute_arrival_rate(c.loc[c.name, 'Volume'], c.loc[ c.name, 'Duration'], strikes[c.name])) arrival_rate.name = 'Arrival rate' arrival_rate.index = arrival_rate.index.reorder_levels( ['Class', 'Strike', 'Half-spread']) sbs = volume_duration.groupby('Class').apply(lambda c: StationaryBootstrap( 25, volume=c.loc[c.name, 'Volume'], duration=c.loc[c.name, 'Duration']) ) conf_int = sbs.groupby('Class').apply(lambda c: pd.DataFrame( c[c.name].conf_int(lambda volume, duration: compute_arrival_rate( volume, duration, strikes[c.name]), reps=reps), ['2.5%', '97.5%'], arrival_rate.loc[ c.name].index)) conf_int = conf_int.T.stack('Class') conf_int.index = conf_int.index.reorder_levels( ['Class', 'Strike', 'Half-spread']) sigma = sbs.groupby('Class').apply(lambda c: pd.DataFrame( c[c.name].cov(lambda volume, duration: compute_arrival_rate( volume, duration, strikes[c.name]), reps=reps), arrival_rate.loc[c.name].index, arrival_rate. loc[c.name].index)) sigma = sigma.groupby('Strike').apply( lambda k: k.xs(k.name, level='Strike', axis=1)) sigma.dropna(how='all', inplace=True) gls = arrival_rate.loc[sigma.index].groupby( ['Class', 'Strike']).apply(lambda k: sm.GLS( k.values, sm.add_constant(k.index.get_level_values('Half-spread')), sigma=sigma.xs(k.name, level=['Class', 'Strike']).dropna(axis=1)). fit()) params = gls.apply(lambda g: pd.Series([np.exp(g.params[0]), -g.params[1]], ['A', '$\\kappa$'])) base_conf_int = gls.apply(lambda g: pd.Series( np.exp(g.conf_int(alpha=.1)[0]), ['A 5%', 'A 95%'])) decay_conf_int = gls.apply(lambda g: pd.Series( -g.conf_int(alpha=.1)[1, ::-1], ['$\\kappa$ 5%', '$\\kappa$ 95%'])) params = pd.concat([params, base_conf_int, decay_conf_int], axis=1) arrival_rate = np.exp(pd.concat([arrival_rate, conf_int], axis=1)) return arrival_rate, params
def gls(data, xseq, **params): """ Fit GLS """ X = sm.add_constant(data['x']) Xseq = sm.add_constant(xseq) results = sm.GLS(data['y'], X).fit(**params['method_args']) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(Xseq) if params['se']: alpha = 1 - params['level'] prstd, iv_l, iv_u = wls_prediction_std(results, Xseq, alpha=alpha) data['se'] = prstd data['ymin'] = iv_l data['ymax'] = iv_u return data
def LL(X, Y, Xs, Ys, error): n = len(X) h = 0.1 mean_of_error = np.zeros((len(Xs), len(Ys))) for i in range(len(Xs)): for j in range(len(Ys)): u1 = ((X - Xs[i]) / h)**2 u2 = ((Y - Ys[j]) / h)**2 k = (0.9375 * (1 - ((X - Xs[i]) / h)**2)**2) * (0.9375 * (1 - ((Y - Ys[j]) / h)**2)**2) K = np.diag(k) indep = np.matrix(np.array([np.ones(n), X - Xs[i], Y - Ys[j]]).T) dep = np.matrix(np.array([error]).T) gls_model = sm.GLS(dep, indep, sigma=K) gls_results = gls_model.fit() mean_of_error[i, j] = gls_results.params[0] return mean_of_error