def summary(self, yname=None, xname=None, title=0, alpha=.05, return_fmt='text'): """ This is for testing the new summary setup """ from statsmodels.iolib.summary import (summary_top, summary_params, summary_return) ## left = [(i, None) for i in ( ## 'Dependent Variable:', ## 'Model type:', ## 'Method:', ## 'Date:', ## 'Time:', ## 'Number of Obs:', ## 'df resid', ## 'df model', ## )] top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['IRLS']), ('Norm:', [self.fit_options['norm']]), ('Scale Est.:', [self.fit_options['scale_est']]), ('Cov Type:', [self.fit_options['cov']]), ('Date:', None), ('Time:', None), ('No. Iterations:', ["%d" % self.fit_history['iteration']]) ] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None) ] if not title is None: title = "Robust linear Model Regression Results" #boiler plate from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, #[], yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=.05, use_t=False) #diagnostic table is not used yet # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") #add warnings/notes, added to text format only etext =[] wstr = \ '''If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .''' etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, yname=None, xname=None, title=0, alpha=0.05, return_fmt='text'): from statsmodels.iolib.summary import (summary_top, summary_params, summary_return) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Interior Point']), ('Date:', None), ('Time:', None)] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None), ('Tau:', ["%.3f" % self.fit_history['tau']]), ('Iterations:', ["%d" % self.fit_history['iterations']])] if not title is None: title = "Quantile Regression Results" from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=False) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05, yname_list=None): top_left = [('Dep. Variable:', None), ('Model:', [self.model.__class__.__name__]), ('Method:', ['MLE']), ('Date:', None), ('Time:', None), ('Converged:', ["%s" % self.mle_retvals['converged']])] top_right = [ ('No. Observations:', None), ('Log-Likelihood:', None), ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #boiler plate from statsmodels.iolib.summary import Summary smry = Summary() # for top of table smry.add_table_2cols( self, gleft=top_left, gright=top_right, #[], yname=yname, xname=xname, title=title) # for parameters, etc smry.add_table_params(self, yname=yname_list, xname=xname, alpha=alpha, use_t=True) return smry
def summary(self, yname=None, xname=None, title=0, alpha=0.05, return_fmt="text"): """ This is for testing the new summary setup """ from statsmodels.iolib.summary import summary_top, summary_params, summary_return ## left = [(i, None) for i in ( ## 'Dependent Variable:', ## 'Model type:', ## 'Method:', ## 'Date:', ## 'Time:', ## 'Number of Obs:', ## 'df resid', ## 'df model', ## )] top_left = [ ("Dep. Variable:", None), ("Model:", None), ("Method:", ["IRLS"]), ("Norm:", [self.fit_options["norm"]]), ("Scale Est.:", [self.fit_options["scale_est"]]), ("Cov Type:", [self.fit_options["cov"]]), ("Date:", None), ("Time:", None), ("No. Iterations:", ["%d" % self.fit_history["iteration"]]), ] top_right = [("No. Observations:", None), ("Df Residuals:", None), ("Df Model:", None)] if not title is None: title = "Robust linear Model Regression Results" # boiler plate from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) # [], smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # diagnostic table is not used yet # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") # add warnings/notes, added to text format only etext = [] wstr = """If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .""" etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, yname=None, xname=None, title=0, alpha=.05, return_fmt='text'): """ This is for testing the new summary setup """ top_left = [ ('Dep. Variable:', None), ('Model:', None), ('Method:', ['IRLS']), ('Norm:', [self.fit_options['norm']]), ('Scale Est.:', [self.fit_options['scale_est']]), ('Cov Type:', [self.fit_options['cov']]), ('Date:', None), ('Time:', None), ('No. Iterations:', ["%d" % self.fit_history['iteration']]) ] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None)] if title is not None: title = "Robust linear Model Regression Results" # boiler plate from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # add warnings/notes, added to text format only etext = [] wstr = ("If the model instance has been used for another fit " "with different fit\n" "parameters, then the fit options might not be the correct " "ones anymore .") etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, yname=None, xname=None, title=0, alpha=.05, return_fmt='text'): """ This is for testing the new summary setup """ top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['IRLS']), ('Norm:', [self.fit_options['norm']]), ('Scale Est.:', [self.fit_options['scale_est']]), ('Cov Type:', [self.fit_options['cov']]), ('Date:', None), ('Time:', None), ('No. Iterations:', ["%d" % self.fit_history['iteration']]) ] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None) ] if title is not None: title = "Robust linear Model Regression Results" # boiler plate from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # add warnings/notes, added to text format only etext = [] wstr = ("If the model instance has been used for another fit " "with different fit\n" "parameters, then the fit options might not be the correct " "ones anymore .") etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, yname=None, xname=None, title=0, alpha=0.05, return_fmt='text'): from statsmodels.iolib.summary import (summary_top, summary_params, summary_return) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Interior Point']), ('Date:', None), ('Time:', None)] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None), ('Outer Iterations:', ["%d" % self.fit_history['outer_iterations']]), ('Avg. Inner Iterations:', ["%d" % self.fit_history['avg_inner_iterations']]) ] if not title is None: title = "Nonlinear Quantile Regression Results" from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=False) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05, yname_list=None): top_left = [('Dep. Variable:', None), ('Model:', [self.model.__class__.__name__]), ('Method:', ['MLE']), ('Date:', None), ('Time:', None), ('Converged:', ["%s" % self.mle_retvals['converged']])] top_right = [('No. Observations:', None), ('Log-Likelihood:', None), ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #boiler plate from statsmodels.iolib.summary import Summary smry = Summary() # for top of table smry.add_table_2cols(self, gleft=top_left, gright=top_right, #[], yname=yname, xname=xname, title=title) # for parameters, etc smry.add_table_params(self, yname=yname_list, xname=xname, alpha=alpha, use_t=True) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ # #TODO: import where we need it (for now), add as cached attributes # from statsmodels.stats.stattools import (jarque_bera, # omni_normtest, durbin_watson) # jb, jbpv, skew, kurtosis = jarque_bera(self.wresid) # omni, omnipv = omni_normtest(self.wresid) # eigvals = self.eigenvals condno = self.condition_number # # self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis, # omni=omni, omnipv=omnipv, condno=condno, # mineigval=eigvals[0]) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None)] top_right = [ ('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), #[self.df_resid]), #TODO: spelling ('Df Model:', None) #[self.df_model]) ] # diagn_left = [('Omnibus:', ["%#6.3f" % omni]), # ('Prob(Omnibus):', ["%#6.3f" % omnipv]), # ('Skew:', ["%#6.3f" % skew]), # ('Kurtosis:', ["%#6.3f" % kurtosis]) # ] # # diagn_right = [('Durbin-Watson:', ["%#8.3f" % durbin_watson(self.wresid)]), # ('Jarque-Bera (JB):', ["%#8.3f" % jb]), # ('Prob(JB):', ["%#8.3g" % jbpv]), # ('Cond. No.', ["%#8.3g" % condno]) # ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=.05, use_t=True) # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, #yname=yname, xname=xname, #title="") #add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: #TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary( self, alpha=0.05, start=None, title=None, model_name=None, display_params=True, ): """ Summarize the Model Parameters ---------- alpha : float, optional Significance level for the confidence intervals. Default is 0.05. start : int, optional Integer of the start observation. Default is 0. model_name : str The name of the model used. Default is to use model class name. Returns ------- summary : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary # Model specification results model = self.model if title is None: title = "Statespace Model Results" if start is None: start = 0 if self.model._index_dates: ix = self.model._index d = ix[start] sample = ["%02d-%02d-%02d" % (d.month, d.day, d.year)] d = ix[-1] sample += ["- " + "%02d-%02d-%02d" % (d.month, d.day, d.year)] else: sample = [str(start), " - " + str(self.nobs)] # Standardize the model name as a list of str if model_name is None: model_name = model.__class__.__name__ # Diagnostic tests results try: het = self.test_heteroskedasticity(method="breakvar") except Exception: # FIXME: catch something specific het = np.array([[np.nan] * 2]) try: lb = self.test_serial_correlation(method="ljungbox") except Exception: # FIXME: catch something specific lb = np.array([[np.nan] * 2]).reshape(1, 2, 1) try: jb = self.test_normality(method="jarquebera") except Exception: # FIXME: catch something specific jb = np.array([[np.nan] * 4]) # Create the tables if not isinstance(model_name, list): model_name = [model_name] top_left = [("Dep. Variable:", None)] top_left.append(("Model:", [model_name[0]])) for i in range(1, len(model_name)): top_left.append(("", ["+ " + model_name[i]])) top_left += [ ("Date:", None), ("Time:", None), ("Sample:", [sample[0]]), ("", [sample[1]]), ] top_right = [ ("No. Observations:", [self.nobs]), ("Log Likelihood", ["%#5.3f" % self.llf]), ] if hasattr(self, "rsquared"): top_right.append(("R-squared:", ["%#8.3f" % self.rsquared])) top_right += [ ("AIC", ["%#5.3f" % self.aic]), ("BIC", ["%#5.3f" % self.bic]), ("HQIC", ["%#5.3f" % self.hqic]), ] if hasattr(self, "filter_results"): if (self.filter_results is not None and self.filter_results.filter_concentrated): top_right.append(("Scale", ["%#5.3f" % self.scale])) else: top_right.append(("Scale", ["%#5.3f" % self.scale])) if hasattr(self, "cov_type"): top_left.append(("Covariance Type:", [self.cov_type])) format_str = lambda array: [ # noqa:E731 ", ".join(["{0:.2f}".format(i) for i in array]) ] diagn_left = [ ("Ljung-Box (Q):", format_str(lb[:, 0, -1])), ("Prob(Q):", format_str(lb[:, 1, -1])), ("Heteroskedasticity (H):", format_str(het[:, 0])), ("Prob(H) (two-sided):", format_str(het[:, 1])), ] diagn_right = [ ("Jarque-Bera (JB):", format_str(jb[:, 0])), ("Prob(JB):", format_str(jb[:, 1])), ("Skew:", format_str(jb[:, 2])), ("Kurtosis:", format_str(jb[:, 3])), ] summary = Summary() summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) if len(self.params) > 0 and display_params: summary.add_table_params(self, alpha=alpha, xname=self.param_names, use_t=False) summary.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, title="") # Add warnings/notes, added to text format only etext = [] if hasattr(self, "cov_type") and "description" in self.cov_kwds: etext.append(self.cov_kwds["description"]) if self._rank < (len(self.params) - len(self.fixed_params)): cov_params = self.cov_params() if len(self.fixed_params) > 0: mask = np.ix_(self._free_params_index, self._free_params_index) cov_params = cov_params[mask] etext.append("Covariance matrix is singular or near-singular," " with condition number %6.3g. Standard errors may be" " unstable." % _safe_cond(cov_params)) if etext: etext = [ "[{0}] {1}".format(i + 1, text) for i, text in enumerate(etext) ] etext.insert(0, "Warnings:") summary.add_extra_txt(etext) return summary
def summary(self, alpha=.05, start=None, model_name=None): """ Summarize the Model Parameters ---------- alpha : float, optional Significance level for the confidence intervals. Default is 0.05. start : int, optional Integer of the start observation. Default is 0. model_name : string The name of the model used. Default is to use model class name. Returns ------- summary : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary model = self.model title = 'Statespace Model Results' if start is None: start = 0 if self.data.dates is not None: dates = self.data.dates d = dates[start] sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)] d = dates[-1] sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)] else: sample = [str(start), ' - ' + str(self.model.nobs)] if model_name is None: model_name = model.__class__.__name__ top_left = [('Dep. Variable:', None), ('Model:', [model_name]), ('Date:', None), ('Time:', None), ('Sample:', [sample[0]]), ('', [sample[1]])] top_right = [('No. Observations:', [self.model.nobs]), ('Log Likelihood', ["%#5.3f" % self.llf]), ('AIC', ["%#5.3f" % self.aic]), ('BIC', ["%#5.3f" % self.bic]), ('HQIC', ["%#5.3f" % self.hqic])] if hasattr(self, 'cov_type'): top_left.append(('Covariance Type:', [self.cov_type])) summary = Summary() summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) summary.add_table_params(self, alpha=alpha, xname=self._param_names, use_t=False) # Add warnings/notes, added to text format only etext = [] if hasattr(self, 'cov_type'): etext.append(self.cov_kwds['description']) if etext: etext = [ "[{0}] {1}".format(i + 1, text) for i, text in enumerate(etext) ] etext.insert(0, "Warnings:") summary.add_extra_txt(etext) return summary
def summary(self, alpha=.05): """Summarize the Model Parameters ---------- alpha : float, optional Significance level for the confidence intervals. Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ model = self.model title = model.__class__.__name__ + ' Model Results' method = model.method # get sample start = 0 if 'mle' in method else self.k_ar if self.data.dates is not None: dates = self.data.dates sample = [dates[start].strftime('%m-%d-%Y')] sample += ['- ' + dates[-1].strftime('%m-%d-%Y')] else: sample = str(start) + ' - ' + str(len(self.data.orig_endog)) k_ar = self.k_ar order = '({0})'.format(k_ar) dep_name = str(self.model.endog_names) top_left = [('Dep. Variable:', dep_name), ('Model:', [model.__class__.__name__ + order]), ('Method:', [method]), ('Date:', None), ('Time:', None), ('Sample:', [sample[0]]), ('', [sample[1]]) ] top_right = [ ('No. Observations:', [str(len(self.model.endog))]), ('Log Likelihood', ["%#5.3f" % self.llf]), ('S.D. of innovations', ["%#5.3f" % self.sigma2 ** .5]), ('AIC', ["%#5.3f" % self.aic]), ('BIC', ["%#5.3f" % self.bic]), ('HQIC', ["%#5.3f" % self.hqic])] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) smry.add_table_params(self, alpha=alpha, use_t=False) # Make the roots table from statsmodels.iolib.table import SimpleTable if k_ar: arstubs = ["AR.%d" % i for i in range(1, k_ar + 1)] stubs = arstubs roots = self.roots freq = self.arfreq else: # AR(0) model stubs = [] if len(stubs): # not AR(0) modulus = np.abs(roots) data = np.column_stack((roots.real, roots.imag, modulus, freq)) roots_table = SimpleTable([('%17.4f' % row[0], '%+17.4fj' % row[1], '%17.4f' % row[2], '%17.4f' % row[3]) for row in data], headers=[' Real', ' Imaginary', ' Modulus', ' Frequency'], title="Roots", stubs=stubs) smry.tables.append(roots_table) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """ Summarize the fitted model. Parameters ---------- yname : str, optional Default is `y` xname : list[str], optional Names for the exogenous variables, default is "var_xx". Must match the number of parameters in the model title : str, optional Title for the top table. If not None, then this replaces the default title alpha : float Significance level for the confidence intervals Returns ------- smry : Summary instance This holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [ ('Dep. Variable:', None), ('Model:', None), ('Log-Likelihood:', None), ('Method:', [self.method]), ('Date:', None), ('Time:', None), ] top_right = [ ('No. Observations:', None), ('No. groups:', [self.n_groups]), ('Min group size:', [self._group_stats[0]]), ('Max group size:', [self._group_stats[1]]), ('Mean group size:', [self._group_stats[2]]), ] if title is None: title = "Conditional Logit Model Regression Results" # create summary tables from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols( self, gleft=top_left, gright=top_right, # [], yname=yname, xname=xname, title=title) smry.add_table_params( self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) return smry
def summary(self): """ Summarize the fitted Model Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary from statsmodels.iolib.table import SimpleTable model = self.model title = model.__class__.__name__ + " Model Results" dep_variable = "endog" orig_endog = self.model.data.orig_endog if isinstance(orig_endog, pd.DataFrame): dep_variable = orig_endog.columns[0] elif isinstance(orig_endog, pd.Series): dep_variable = orig_endog.name seasonal_periods = (None if self.model.seasonal is None else self.model.seasonal_periods) lookup = { "add": "Additive", "additive": "Additive", "mul": "Multiplicative", "multiplicative": "Multiplicative", None: "None", } transform = self.params["use_boxcox"] box_cox_transform = True if transform else False box_cox_coeff = (transform if isinstance(transform, str) else self.params["lamda"]) if isinstance(box_cox_coeff, float): box_cox_coeff = "{:>10.5f}".format(box_cox_coeff) top_left = [ ("Dep. Variable:", [dep_variable]), ("Model:", [model.__class__.__name__]), ("Optimized:", [str(np.any(self.optimized))]), ("Trend:", [lookup[self.model.trend]]), ("Seasonal:", [lookup[self.model.seasonal]]), ("Seasonal Periods:", [str(seasonal_periods)]), ("Box-Cox:", [str(box_cox_transform)]), ("Box-Cox Coeff.:", [str(box_cox_coeff)]), ] top_right = [ ("No. Observations:", [str(len(self.model.endog))]), ("SSE", ["{:5.3f}".format(self.sse)]), ("AIC", ["{:5.3f}".format(self.aic)]), ("BIC", ["{:5.3f}".format(self.bic)]), ("AICC", ["{:5.3f}".format(self.aicc)]), ("Date:", None), ("Time:", None), ] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) formatted = self.params_formatted # type: pd.DataFrame def _fmt(x): abs_x = np.abs(x) scale = 1 if abs_x != 0: scale = int(np.log10(abs_x)) if scale > 4 or scale < -3: return "{:>20.5g}".format(x) dec = min(7 - scale, 7) fmt = "{{:>20.{0}f}}".format(dec) return fmt.format(x) tab = [] for _, vals in formatted.iterrows(): tab.append([ _fmt(vals.iloc[1]), "{0:>20}".format(vals.iloc[0]), "{0:>20}".format(str(bool(vals.iloc[2]))), ]) params_table = SimpleTable( tab, headers=["coeff", "code", "optimized"], title="", stubs=list(formatted.index), ) smry.tables.append(params_table) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """ Summarize the QIF regression results Parameters ---------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [ ('Dep. Variable:', None), ('Method:', ['QIF']), ('Family:', [self.model.family.__class__.__name__]), ('Covariance structure:', [self.model.cov_struct.__class__.__name__]), ('Date:', None), ('Time:', None), ] NY = [len(y) for y in self.model.groups_ix] top_right = [ ('No. Observations:', [sum(NY)]), ('No. clusters:', [len(NY)]), ('Min. cluster size:', [min(NY)]), ('Max. cluster size:', [max(NY)]), ('Mean cluster size:', ["%.1f" % np.mean(NY)]), ('Scale:', ["%.3f" % self.scale]), ] if title is None: title = self.model.__class__.__name__ + ' ' +\ "Regression Results" # Override the exog variable names if xname is provided as an # argument. if xname is None: xname = self.model.exog_names if yname is None: yname = self.model.endog_names # Create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=False) return smry
def summary(self): """ Summarize the fitted Model Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary from statsmodels.iolib.table import SimpleTable model = self.model title = model.__class__.__name__ + ' Model Results' dep_variable = 'endog' if isinstance(self.model.endog, pd.DataFrame): dep_variable = self.model.endog.columns[0] elif isinstance(self.model.endog, pd.Series): dep_variable = self.model.endog.name seasonal_periods = None if self.model.seasonal is None else self.model.seasonal_periods lookup = {'add': 'Additive', 'additive': 'Additive', 'mul': 'Multiplicative', 'multiplicative': 'Multiplicative', None: 'None'} transform = self.params['use_boxcox'] box_cox_transform = True if transform else False box_cox_coeff = transform if isinstance(transform, string_types) else self.params['lamda'] if isinstance(box_cox_coeff, float): box_cox_coeff = '{:>10.5f}'.format(box_cox_coeff) top_left = [('Dep. Variable:', [dep_variable]), ('Model:', [model.__class__.__name__]), ('Optimized:', [str(np.any(self.optimized))]), ('Trend:', [lookup[self.model.trend]]), ('Seasonal:', [lookup[self.model.seasonal]]), ('Seasonal Periods:', [str(seasonal_periods)]), ('Box-Cox:', [str(box_cox_transform)]), ('Box-Cox Coeff.:', [str(box_cox_coeff)])] top_right = [ ('No. Observations:', [str(len(self.model.endog))]), ('SSE', ['{:5.3f}'.format(self.sse)]), ('AIC', ['{:5.3f}'.format(self.aic)]), ('BIC', ['{:5.3f}'.format(self.bic)]), ('AICC', ['{:5.3f}'.format(self.aicc)]), ('Date:', None), ('Time:', None)] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) formatted = self.params_formatted # type: pd.DataFrame def _fmt(x): abs_x = np.abs(x) scale = 1 if abs_x != 0: scale = int(np.log10(abs_x)) if scale > 4 or scale < -3: return '{:>20.5g}'.format(x) dec = min(7 - scale, 7) fmt = '{{:>20.{0}f}}'.format(dec) return fmt.format(x) tab = [] for _, vals in formatted.iterrows(): tab.append([_fmt(vals.iloc[1]), '{0:>20}'.format(vals.iloc[0]), '{0:>20}'.format(str(bool(vals.iloc[2])))]) params_table = SimpleTable(tab, headers=['coeff', 'code', 'optimized'], title="", stubs=list(formatted.index)) smry.tables.append(params_table) return smry
def get_statsmodels_summary(self, title=None, alpha=.05): """ title: OPTIONAL. string or None. Will be the title of the returned summary. If None, the default title is used. alpha: OPTIONAL. float. Between 0.0 and 1.0. Determines the width of the displayed, (1 - alpha)% confidence interval. ==================== Returns: statsmodels.summary object. """ try: # Get the statsmodels Summary class from statsmodels.iolib.summary import Summary # Get an instantiation of the Summary class. smry = Summary() # Get the yname and yname_list. # Note I'm not really sure what the yname_list is. new_yname, new_yname_list = self.choice_col, None # Get the model name model_name = self.model_type ########## # Note the following commands are basically directly from # statsmodels.discrete.discrete_model ########## top_left = [('Dep. Variable:', None), ('Model:', [model_name]), ('Method:', ['MLE']), ('Date:', None), ('Time:', None), #('No. iterations:', ["%d" % self.mle_retvals['iterations']]), ('converged:', [str(self.estimation_success)]) ] top_right = [('No. Observations:', ["{:,}".format(self.nobs)]), ('Df Residuals:', ["{:,}".format(self.df_resid)]), ('Df Model:', ["{:,}".format(self.df_model)]), ('Pseudo R-squ.:', ["{:.3f}".format(self.rho_squared)]), ('Pseudo R-bar-squ.:', ["{:.3f}".format(self.rho_bar_squared)]), ('Log-Likelihood:', ["{:,.3f}".format(self.llf)]), ('LL-Null:', ["{:,.3f}".format(self.null_log_likelihood)]), ] if title is None: title = model_name + ' ' + "Regression Results" xnames = self.params.index.tolist() # for top of table smry.add_table_2cols(self, gleft=top_left, gright=top_right, #[], yname=new_yname, xname=xnames, title=title) # for parameters, etc smry.add_table_params(self, yname=[new_yname_list], xname=xnames, alpha=alpha, use_t=False) return smry except: print("statsmodels not installed. Resorting to standard summary") return self.print_summaries()
def summary(self, impact_date=None, impacted_variable=None, update_date=None, updated_variable=None, impacts_groupby='impact date', details_groupby='update date', show_revisions_columns=None, sparsify=True, include_details_tables=None, include_revisions_tables=False, float_format='%.2f'): """ Create summary tables describing news and impacts Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the impact and details tables will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these labels will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. update_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular updated periods to display. The updated date(s) describe the periods in which the new data points were available that generated the news). See the note on `impact_date` for details about what these labels are. updated_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular updated variables to display. The updated variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. impacts_groupby : {impact date, impacted date} The primary variable for grouping results in the impacts table. The default is to group by update date. details_groupby : str One of "update date", "updated date", "impact date", or "impacted date". The primary variable for grouping results in the details table. Only used if the details tables are included. The default is to group by update date. show_revisions_columns : bool, optional If set to False, the impacts table will not show the impacts from data revisions or the total impacts. Default is to show the revisions and totals columns if any revisions were made and otherwise to hide them. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. include_details_tables : bool, optional If set to True, the summary will show tables describing the details of how news from specific updates translate into specific impacts. These tables can be very long, particularly in cases where there were many updates and in multivariate models. The default is to show detailed tables only for univariate models. include_revisions_tables : bool, optional If set to True, the summary will show tables describing the revisions and updates that lead to impacts on variables of interest. float_format : str, optional Formatter format string syntax for converting numbers to strings. Default is '%.2f'. Returns ------- summary_tables : Summary Summary tables describing news and impacts. Basic tables include: - A table with general information about the sample. - A table describing the impacts of revisions and news. - Tables describing revisions in the dataset since the previous results set (unless `include_revisions_tables=False`). In univariate models or if `include_details_tables=True`, one or more tables will additionally be included describing the details of how news from specific updates translate into specific impacts. See Also -------- summary_impacts summary_details summary_revisions summary_updates """ # Default for include_details_tables if include_details_tables is None: include_details_tables = self.updated.model.k_endog == 1 # Model specification results model = self.model.model title = 'News' def get_sample(model): if model._index_dates: ix = model._index d = ix[0] sample = ['%s' % d] d = ix[-1] sample += ['- ' + '%s' % d] else: sample = [str(0), ' - ' + str(model.nobs)] return sample previous_sample = get_sample(self.previous.model) revised_sample = get_sample(self.updated.model) # Standardize the model name as a list of str model_name = model.__class__.__name__ # Top summary table top_left = [('Model:', [model_name]), ('Date:', None), ('Time:', None)] top_right = [ ('Original sample:', [previous_sample[0]]), ('', [previous_sample[1]]), ('Update through:', [revised_sample[1][2:]]), ('No. Revisions:', [len(self.revisions_ix)]), ('No. New datapoints:', [len(self.updates_ix)])] summary = Summary() self.model.endog_names = self.model.model.endog_names summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) table_ix = 1 # Impact table summary.tables.insert(table_ix, self.summary_impacts( impact_date=impact_date, impacted_variable=impacted_variable, groupby=impacts_groupby, show_revisions_columns=show_revisions_columns, sparsify=sparsify, float_format=float_format)) table_ix += 1 # News table if len(self.updates_iloc) > 0: summary.tables.insert( table_ix, self.summary_news(sparsify=sparsify)) table_ix += 1 # Detail tables multiple_tables = self.updated.model.k_endog > 1 details_tables = self.summary_details( impact_date=impact_date, impacted_variable=impacted_variable, groupby=details_groupby, sparsify=sparsify, float_format=float_format, multiple_tables=multiple_tables) if not multiple_tables: details_tables = [details_tables] if include_details_tables: for table in details_tables: summary.tables.insert(table_ix, table) table_ix += 1 # Revisions if include_revisions_tables and len(self.revisions_iloc) > 0: summary.tables.insert( table_ix, self.summary_revisions(sparsify=sparsify)) table_ix += 1 return summary
def get_statsmodels_summary(self, title=None, alpha=.05): """ title: OPTIONAL. string or None. Will be the title of the returned summary. If None, the default title is used. alpha: OPTIONAL. float. Between 0.0 and 1.0. Determines the width of the displayed, (1 - alpha)% confidence interval. ==================== Returns: statsmodels.summary object. """ try: # Get the statsmodels Summary class from statsmodels.iolib.summary import Summary # Get an instantiation of the Summary class. smry = Summary() # Get the yname and yname_list. # Note I'm not really sure what the yname_list is. new_yname, new_yname_list = self.choice_col, None # Get the model name model_name = self.model_type ########## # Note the following commands are basically directly from # statsmodels.discrete.discrete_model ########## top_left = [ ('Dep. Variable:', None), ('Model:', [model_name]), ('Method:', ['MLE']), ('Date:', None), ('Time:', None), #('No. iterations:', ["%d" % self.mle_retvals['iterations']]), ('converged:', [str(self.estimation_success)]) ] top_right = [ ('No. Observations:', ["{:,}".format(self.nobs)]), ('Df Residuals:', ["{:,}".format(self.df_resid)]), ('Df Model:', ["{:,}".format(self.df_model)]), ('Pseudo R-squ.:', ["{:.3f}".format(self.rho_squared)]), ('Pseudo R-bar-squ.:', ["{:.3f}".format(self.rho_bar_squared) ]), ('Log-Likelihood:', ["{:,.3f}".format(self.llf)]), ('LL-Null:', ["{:,.3f}".format(self.null_log_likelihood)]), ] if title is None: title = model_name + ' ' + "Regression Results" xnames = self.params.index.tolist() # for top of table smry.add_table_2cols( self, gleft=top_left, gright=top_right, #[], yname=new_yname, xname=xnames, title=title) # for parameters, etc smry.add_table_params(self, yname=[new_yname_list], xname=xnames, alpha=alpha, use_t=False) return smry except: print("statsmodels not installed. Resorting to standard summary") return self.print_summaries()
def summary(self, yname=None, xname=None, title=None, alpha=.05): """ Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [('Dep. Variable:', None), ('Model:', None), ('Model Family:', [self.family.__class__.__name__]), ('Link Function:', [self.family.link.__class__.__name__]), ('Method:', ['IRLS']), ('Date:', None), ('Time:', None), ('No. Iterations:', ["%d" % self.fit_history['iteration']]), ] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None), ('Scale:', [self.scale]), ('Log-Likelihood:', None), ('Deviance:', ["%#8.5g" % self.deviance]), ('Pearson chi2:', ["%#6.3g" % self.pearson_chi2]) ] if title is None: title = "Generalized Linear Model Regression Results" #create summary tables from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, # [], yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=True) #diagnostic table is not used yet: #smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") return smry
def summary(self): """ Summarize the fitted Model Returns ------- smry : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary from statsmodels.iolib.table import SimpleTable model = self.model title = model.__class__.__name__ + ' Model Results' dep_variable = 'endog' if isinstance(self.model.endog, pd.DataFrame): dep_variable = self.model.endog.columns[0] elif isinstance(self.model.endog, pd.Series): dep_variable = self.model.endog.name seasonal_periods = None if self.model.seasonal is None else self.model.seasonal_periods lookup = { 'add': 'Additive', 'additive': 'Additive', 'mul': 'Multiplicative', 'multiplicative': 'Multiplicative', None: 'None' } transform = self.params['use_boxcox'] box_cox_transform = True if transform else False box_cox_coeff = transform if isinstance(transform, str) else self.params['lamda'] if isinstance(box_cox_coeff, float): box_cox_coeff = '{:>10.5f}'.format(box_cox_coeff) top_left = [('Dep. Variable:', [dep_variable]), ('Model:', [model.__class__.__name__]), ('Optimized:', [str(np.any(self.optimized))]), ('Trend:', [lookup[self.model.trend]]), ('Seasonal:', [lookup[self.model.seasonal]]), ('Seasonal Periods:', [str(seasonal_periods)]), ('Box-Cox:', [str(box_cox_transform)]), ('Box-Cox Coeff.:', [str(box_cox_coeff)])] top_right = [('No. Observations:', [str(len(self.model.endog))]), ('SSE', ['{:5.3f}'.format(self.sse)]), ('AIC', ['{:5.3f}'.format(self.aic)]), ('BIC', ['{:5.3f}'.format(self.bic)]), ('AICC', ['{:5.3f}'.format(self.aicc)]), ('Date:', None), ('Time:', None)] smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) formatted = self.params_formatted # type: pd.DataFrame def _fmt(x): abs_x = np.abs(x) scale = 1 if abs_x != 0: scale = int(np.log10(abs_x)) if scale > 4 or scale < -3: return '{:>20.5g}'.format(x) dec = min(7 - scale, 7) fmt = '{{:>20.{0}f}}'.format(dec) return fmt.format(x) tab = [] for _, vals in formatted.iterrows(): tab.append([ _fmt(vals.iloc[1]), '{0:>20}'.format(vals.iloc[0]), '{0:>20}'.format(str(bool(vals.iloc[2]))) ]) params_table = SimpleTable(tab, headers=['coeff', 'code', 'optimized'], title="", stubs=list(formatted.index)) smry.tables.append(params_table) return smry
def summary(self, alpha=.05, start=None, model_name=None): """ Summarize the Model Parameters ---------- alpha : float, optional Significance level for the confidence intervals. Default is 0.05. start : int, optional Integer of the start observation. Default is 0. model_name : string The name of the model used. Default is to use model class name. Returns ------- summary : Summary instance This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ from statsmodels.iolib.summary import Summary model = self.model title = 'Statespace Model Results' if start is None: start = 0 if self.data.dates is not None: dates = self.data.dates d = dates[start] sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)] d = dates[-1] sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)] else: sample = [str(start), ' - ' + str(self.model.nobs)] if model_name is None: model_name = model.__class__.__name__ top_left = [ ('Dep. Variable:', None), ('Model:', [model_name]), ('Date:', None), ('Time:', None), ('Sample:', [sample[0]]), ('', [sample[1]]) ] top_right = [ ('No. Observations:', [self.model.nobs]), ('Log Likelihood', ["%#5.3f" % self.llf]), ('AIC', ["%#5.3f" % self.aic]), ('BIC', ["%#5.3f" % self.bic]), ('HQIC', ["%#5.3f" % self.hqic]) ] if hasattr(self, 'cov_type'): top_left.append(('Covariance Type:', [self.cov_type])) summary = Summary() summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) summary.add_table_params(self, alpha=alpha, xname=self.data.param_names, use_t=False) # Add warnings/notes, added to text format only etext = [] if hasattr(self, 'cov_type'): etext.append(self.cov_kwds['description']) if etext: etext = ["[{0}] {1}".format(i + 1, text) for i, text in enumerate(etext)] etext.insert(0, "Warnings:") summary.add_extra_txt(etext) return summary
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [('Dep. Variable:', None), ('Model:', None), ('Model Family:', [self.family.__class__.__name__]), ('Link Function:', [self.family.link.__class__.__name__]), ('Method:', ['IRLS']), ('Date:', None), ('Time:', None), ('No. Iterations:', ["%d" % self.fit_history['iteration']]), ] top_right = [('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None), ('Scale:', [self.scale]), ('Log-Likelihood:', None), ('Deviance:', ["%#8.5g" % self.deviance]), ('Pearson chi2:', ["%#6.3g" % self.pearson_chi2]) ] if title is None: title = "Generalized Linear Model Regression Results" #create summary tables from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, #[], yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=True) #diagnostic table is not used yet: #smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ # #TODO: import where we need it (for now), add as cached attributes # from statsmodels.stats.stattools import (jarque_bera, # omni_normtest, durbin_watson) # jb, jbpv, skew, kurtosis = jarque_bera(self.wresid) # omni, omnipv = omni_normtest(self.wresid) # eigvals = self.eigenvals condno = self.condition_number # # self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis, # omni=omni, omnipv=omnipv, condno=condno, # mineigval=eigvals[0]) top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None) ] top_right = [('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), #[self.df_resid]), #TODO: spelling ('Df Model:', None) #[self.df_model]) ] # diagn_left = [('Omnibus:', ["%#6.3f" % omni]), # ('Prob(Omnibus):', ["%#6.3f" % omnipv]), # ('Skew:', ["%#6.3f" % skew]), # ('Kurtosis:', ["%#6.3f" % kurtosis]) # ] # # diagn_right = [('Durbin-Watson:', ["%#8.3f" % durbin_watson(self.wresid)]), # ('Jarque-Bera (JB):', ["%#8.3f" % jb]), # ('Prob(JB):', ["%#8.3g" % jbpv]), # ('Cond. No.', ["%#8.3g" % condno]) # ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" #create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=.05, use_t=True) # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, #yname=yname, xname=xname, #title="") #add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: #TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, alpha=.05): """ Returns a summary table for marginal effects Parameters ---------- alpha : float Number between 0 and 1. The confidence intervals have the probability 1-alpha. Returns ------- Summary : SummaryTable A SummaryTable instance """ _check_at_is_all(self.margeff_options) results = self.results model = results.model title = model.__class__.__name__ + " Marginal Effects" method = self.margeff_options['method'] top_left = [('Dep. Variable:', [model.endog_names]), ('Method:', [method]), ('At:', [self.margeff_options['at']]),] from statsmodels.iolib.summary import (Summary, summary_params, table_extend) exog_names = model.exog_names[:] # copy smry = Summary() # sigh, we really need to hold on to this in _data... _, const_idx = _get_const_index(model.exog) if const_idx is not None: exog_names.pop(const_idx) J = int(getattr(model, "J", 1)) if J > 1: yname, yname_list = results._get_endog_name(model.endog_names, None, all=True) else: yname = model.endog_names yname_list = [yname] smry.add_table_2cols(self, gleft=top_left, gright=[], yname=yname, xname=exog_names, title=title) #NOTE: add_table_params is not general enough yet for margeff # could use a refactor with getattr instead of hard-coded params # tvalues etc. table = [] conf_int = self.conf_int(alpha) margeff = self.margeff margeff_se = self.margeff_se tvalues = self.tvalues pvalues = self.pvalues if J > 1: for eq in range(J): restup = (results, margeff[:,eq], margeff_se[:,eq], tvalues[:,eq], pvalues[:,eq], conf_int[:,:,eq]) tble = summary_params(restup, yname=yname_list[eq], xname=exog_names, alpha=alpha, use_t=False, skip_header=True) tble.title = yname_list[eq] # overwrite coef with method name header = ['', _transform_names[method], 'std err', 'z', 'P>|z|', '[%3.1f%% Conf. Int.]' % (100-alpha*100)] tble.insert_header_row(0, header) #from IPython.core.debugger import Pdb; Pdb().set_trace() table.append(tble) table = table_extend(table, keep_headers=True) else: restup = (results, margeff, margeff_se, tvalues, pvalues, conf_int) table = summary_params(restup, yname=yname, xname=exog_names, alpha=alpha, use_t=False, skip_header=True) header = ['', _transform_names[method], 'std err', 'z', 'P>|z|', '[%3.1f%% Conf. Int.]' % (100-alpha*100)] table.insert_header_row(0, header) smry.tables.append(table) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """ Summarize the Regression Results Parameters ----------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [('Dep. Variable:', None), ('Model:', [self.model]), ('Model Family:', [self.family_name]), ('Link Function:', [self.family_link]), ('Method:', [self.method]), ('Covariance Type:', [self.cov_type]), ('No. Observations:', [self.nobs])] top_right = [('No. Iterations:', ["%d" % self.fit_history]), ('Df Residuals:', [self.df_resid]), ('Df Model:', [self.df_model]), ('Scale:', ["%#8.5g" % self.scale]), ('Log-Likelihood:', ["%#8.5g" % self.llf]), ('Deviance:', ["%#8.5g" % self.deviance]), ('Pearson chi2:', ["%#6.3g" % self.pearson_chi2])] if title is None: title = "Generalized Linear Model Regression Results" # create summary tables y = self.yname x = self.xname from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols( self, gleft=top_left, gright=top_right, # [], yname=y, xname=x, title=title) smry.add_table_params(self, yname=y, xname=x, alpha=0.05, use_t=True) if hasattr(self, 'constraints'): smry.add_extra_txt([ 'Model has been estimated subject to linear ' 'equality constraints.' ]) # diagnostic table is not used yet: # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """ Summarize the fitted model. Parameters ---------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [ ('Dep. Variable:', None), ('Model:', None), ('Log-Likelihood:', None), ('Method:', [self.method]), ('Date:', None), ('Time:', None), ] top_right = [ ('No. Observations:', None), ('No. groups:', [self.n_groups]), ('Min group size:', [self._group_stats[0]]), ('Max group size:', [self._group_stats[1]]), ('Mean group size:', [self._group_stats[2]]), ] if title is None: title = "Conditional Logit Model Regression Results" # create summary tables from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols( self, gleft=top_left, gright=top_right, # [], yname=yname, xname=xname, title=title) smry.add_table_params( self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ---------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [ ('Dep. Variable:', None), # ('Model:', None), ('Method:', ['Maximum Likelihood']), # ('Date:', None), # ('Time:', None), ('No. Observations:', None), ('No. Uncensored Obs:', ["{:.0f}".format(self.obs[1])]), ('No. Left-censored Obs:', ["{:.0f}".format(self.obs[2])]), ('No. Right-censored Obs:', ["{:.0f}".format(self.obs[3])]), ('Df Residuals:', None), ('Df Model:', None), ] top_right = [ ('Pseudo R-squ:', ["{:.3f}".format(self.prsquared)]), ('Log-Likelihood:', ["{:.1f}".format(self.llf)]), ('LL-Null:', ["{:.1f}".format(self.llnull)]), ('LL-Ratio:', ["{:.1f}".format(self.llr)]), ('LLR p-value:', ["{:.3f}".format(self.llr_pvalue)]), ('AIC:', ["{:.1f}".format(self.aic)]), ('BIC:', ["{:.1f}".format(self.bic)]), ('Covariance Type:', [self.cov_type]), ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" # create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ---------- yname : str, optional Default is `y` xname : list[str], optional Names for the exogenous variables. Default is `var_##` for ## in the number of regressors. Must match the number of parameters in the model title : str, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ eigvals = self.eigenvals condno = self.condition_number top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None) ] top_right = [('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None) ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" # create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: # TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry
def summary(self, title = None, alpha = .05): """Summarize the Clogit Results Parameters ----------- title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ top_left = [('Dep. Variable:', None), ('Model:', [self.model.__class__.__name__]), ('Method:', [self.mle_settings['optimizer']]), ('Date:', None), ('Time:', None), ('Converged:', ["%s" % self.mle_retvals['converged']]), ('Iterations:', ["%s" % self.mle_retvals['iterations']]), ('Elapsed time (seg.):', ["%10.4f" % self.model.elapsed_time]), ('Num. alternatives:', [self.model.J]) ] top_right = [ ('No. Cases:', [self.nobs]), ('No. Observations:', [self.nobs_bychoice]), ('Df Residuals:', [self.model.df_resid]), ('Df Model:', [self.model.df_model]), ('Log-Likelihood:', None), ('LL-Null:', ["%#8.5g" % self.llnull]), ('Pseudo R-squ.:', ["%#6.4g" % self.prsquared]), ('LLR p-value:', ["%#6.4g" % self.llr_pvalue]), ('Likelihood ratio test:', ["%#8.5g" %self.llrt]), ('AIC:', ["%#8.5g" %self.aic]) ] if title is None: title = self.model.__class__.__name__ + ' ' + \ "results" #boiler plate from statsmodels.iolib.summary import Summary, SimpleTable smry = Summary() # for top of table smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) # Frequencies of alternatives mydata = [self.freq_alt, self.perc_alt] myheaders = self.alt mytitle = ("") mystubs = ["Frequencies of alternatives: ", "Percentage:"] tbl = SimpleTable(mydata, myheaders, mystubs, title = mytitle, data_fmts = ["%5.2f"]) smry.tables.append(tbl) # for parameters smry.add_table_params(self, alpha=alpha, use_t=False) return smry
def summary(self, regpyhdfe, yname=None, xname=None, title=None, alpha=.05): """ Summarize the Regression Results. Parameters ---------- yname : str, optional Name of endogenous (response) variable. The Default is `y`. xname : list[str], optional Names for the exogenous variables. Default is `var_##` for ## in the number of regressors. Must match the number of parameters in the model. title : str, optional Title for the top table. If not None, then this replaces the default title. alpha : float The significance level for the confidence intervals. Returns ------- Summary Instance holding the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : A class that holds summary results. """ ########################################################################################################## ########################################################################################################## # https://apithymaxim.wordpress.com/2020/03/16/clustering-standard-errors-by-hand-using-python/ # http://cameron.econ.ucdavis.edu/research/Cameron_Miller_JHR_2015_February.pdf #N,k,Nclusts = len(df.index),3,50 # Number of observations, right hand side columns counting constant, number of clusters #X = np.hstack( (np.random.random((N,k-1)), np.ones((N,1)) ) ) #X = get_np_columns(df, ['wks_ue', 'tenure'], intercept=True) X = regpyhdfe.data[:, 1:] #y = get_np_columns(df, ['ttl_exp']) y = np.expand_dims(regpyhdfe.data[:, 0], 1) # Calculate (X'X)^-1 and the vector of coefficients, beta XX_inv = np.linalg.inv(X.T.dot(X)) beta = (XX_inv).dot(X.T.dot(y)) resid = y - X.dot(beta) #ID = np.random.choice([x for x in range(Nclusts)],N) # Vector of cluster IDs #ID = np.squeeze(get_np_columns(df, ['delete_me'])) ID = np.squeeze(regpyhdfe.groups_np) c_list = np.unique(ID) # Get unique list of clusters N, k, Nclusts = X.shape[0], X.shape[1], int(c_list.shape[0]) sum_XuuTX = 0 for c in range(0, Nclusts): in_cluster = (ID == c_list[c]) # Indicator for given cluster value resid_c = resid[in_cluster] uuT = resid_c.dot(resid_c.T) Xc = X[in_cluster] XuuTX = Xc.T.dot(uuT).dot(Xc) sum_XuuTX += XuuTX adj = (Nclusts / (Nclusts - 1)) * ( (N - 1) / (N - k) ) # Degrees of freedom correction from https://www.stata.com/manuals13/u20.pdf p. 54 # TODO: actually check if the fixed effects are nested df_a_nested = 1 adj = ((N - 1) / (N - df_a_nested - k)) * (Nclusts / (Nclusts - 1)) V_beta = adj * (XX_inv.dot(sum_XuuTX).dot(XX_inv)) se_beta = np.sqrt(np.diag(V_beta)) # Output data for Stata for_stata = pd.DataFrame(X) for_stata.columns = ["X" + str(i) for i in range(k)] for_stata['ID'] = ID for_stata['y'] = y ##for_stata.to_stata("resid_test.dta") print('B', beta, '\n SE: \n', se_beta) beta = np.squeeze(beta) t_values = beta / se_beta print('T values', t_values) from scipy.stats import t p_values = 2 * t.cdf(-np.abs(t_values), regpyhdfe.model.df_resid) # confidence interval size t_interval = np.asarray( t.interval(alpha=(1 - alpha), df=regpyhdfe.model.df_resid)) print("t_interval", t_interval) intervals = np.empty(shape=(beta.shape[0], 2)) # for each variables for i in range(0, intervals.shape[0]): intervals[i] = t_interval * se_beta[i] + beta[i] print('intervals', intervals) tmp1 = np.linalg.solve(V_beta, np.mat(beta).T) tmp2 = np.dot(np.mat(beta), tmp1) fvalue = tmp2[0, 0] / k import pdb pdb.set_trace() print('fvalue', fvalue) # from statsmodels.stats.stattools import ( # jarque_bera, omni_normtest, durbin_watson) # jb, jbpv, skew, kurtosis = jarque_bera(self.wresid) # omni, omnipv = omni_normtest(self.wresid) # eigvals = self.eigenvals # condno = self.condition_number # TODO: Avoid adding attributes in non-__init__ # self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis, # omni=omni, omnipv=omnipv, condno=condno, # mineigval=eigvals[-1]) # TODO not used yet # diagn_left_header = ['Models stats'] # diagn_right_header = ['Residual stats'] # TODO: requiring list/iterable is a bit annoying # need more control over formatting # TODO: default do not work if it's not identically spelled top_left = [ ('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None), ('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None), ] if hasattr(self, 'cov_type'): top_left.append(('Covariance Type:', [self.cov_type])) rsquared_type = '' if self.k_constant else ' (uncentered)' top_right = [ ('R-squared' + rsquared_type + ':', ["%#8.3f" % self.rsquared]), ('Adj. R-squared' + rsquared_type + ':', ["%#8.3f" % self.rsquared_adj]), ('F-statistic:', ["%#8.4g" % self.fvalue]), ('Prob (F-statistic):', ["%#6.3g" % self.f_pvalue]), ] # diagn_left = [('Omnibus:', ["%#6.3f" % omni]), # ('Prob(Omnibus):', ["%#6.3f" % omnipv]), # ('Skew:', ["%#6.3f" % skew]), # ('Kurtosis:', ["%#6.3f" % kurtosis]) # ] # # diagn_right = [('Durbin-Watson:', # ["%#8.3f" % durbin_watson(self.wresid)] # ), # ('Jarque-Bera (JB):', ["%#8.3f" % jb]), # ('Prob(JB):', ["%#8.3g" % jbpv]), # ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" # create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, # yname=yname, xname=xname, # title="") # add warnings/notes, added to text format only etext = [] if not self.k_constant: etext.append("R² is computed without centering (uncentered) since the " "model does not contain a constant.") if hasattr(self, 'cov_type'): etext.append(self.cov_kwds['description']) if self.model.exog.shape[0] < self.model.exog.shape[1]: wstr = "The input rank is higher than the number of observations." etext.append(wstr) # if eigvals[-1] < 1e-10: # wstr = "The smallest eigenvalue is %6.3g. This might indicate " # wstr += "that there are\n" # wstr += "strong multicollinearity problems or that the design " # wstr += "matrix is singular." # wstr = wstr % eigvals[-1] # etext.append(wstr) # elif condno > 1000: # TODO: what is recommended? # wstr = "The condition number is large, %6.3g. This might " # wstr += "indicate that there are\n" # wstr += "strong multicollinearity or other numerical " # wstr += "problems." # wstr = wstr % condno # etext.append(wstr) if etext: etext = [ "[{0}] {1}".format(i + 1, text) for i, text in enumerate(etext) ] etext.insert(0, "Notes:") smry.add_extra_txt(etext) return smry
def summary(self) -> Summary: """ Summarize the model Returns ------- Summary This holds the summary table and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary """ model = self.model smry = Summary() model_name = type(model).__name__ title = model_name + " Results" method = "MLE" if self._use_mle else "OLS/SES" is_series = isinstance(model.endog_orig, pd.Series) index = getattr(model.endog_orig, "index", None) if is_series and isinstance(index, (pd.DatetimeIndex, pd.PeriodIndex)): sample = [index[0].strftime("%m-%d-%Y")] sample += ["- " + index[-1].strftime("%m-%d-%Y")] else: sample = [str(0), str(model.endog_orig.shape[0])] dep_name = getattr(model.endog_orig, "name", "endog") or "endog" top_left = [ ("Dep. Variable:", [dep_name]), ("Method:", [method]), ("Date:", None), ("Time:", None), ("Sample:", [sample[0]]), ("", [sample[1]]), ] method = ("Multiplicative" if model.method.startswith("mul") else "Additive") top_right = [ ("No. Observations:", [str(self._nobs)]), ("Deseasonalized:", [str(model.deseasonalize)]), ] if model.deseasonalize: top_right.extend([ ("Deseas. Method:", [method]), ("Period:", [str(model.period)]), ("", [""]), ("", [""]), ]) else: top_right.extend([("", [""])] * 4) smry.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) table_fmt = {"data_fmts": ["%s", "%#0.4g"], "data_aligns": "r"} data = np.asarray(self.params)[:, None] st = SimpleTable( data, ["Parameters", "Estimate"], list(self.params.index), title="Parameter Estimates", txt_fmt=table_fmt, ) smry.tables.append(st) return smry
def summary(self, alpha=.05): """ Returns a summary table for marginal effects Parameters ---------- alpha : float Number between 0 and 1. The confidence intervals have the probability 1-alpha. Returns ------- Summary : SummaryTable A SummaryTable instance """ _check_at_is_all(self.margeff_options) results = self.results model = results.model title = model.__class__.__name__ + " Marginal Effects" method = self.margeff_options['method'] top_left = [ ('Dep. Variable:', [model.endog_names]), ('Method:', [method]), ('At:', [self.margeff_options['at']]), ] from statsmodels.iolib.summary import (Summary, summary_params, table_extend) exog_names = model.exog_names[:] # copy smry = Summary() # sigh, we really need to hold on to this in _data... _, const_idx = _get_const_index(model.exog) if const_idx is not None: exog_names.pop(const_idx[0]) J = int(getattr(model, "J", 1)) if J > 1: yname, yname_list = results._get_endog_name(model.endog_names, None, all=True) else: yname = model.endog_names yname_list = [yname] smry.add_table_2cols(self, gleft=top_left, gright=[], yname=yname, xname=exog_names, title=title) #NOTE: add_table_params is not general enough yet for margeff # could use a refactor with getattr instead of hard-coded params # tvalues etc. table = [] conf_int = self.conf_int(alpha) margeff = self.margeff margeff_se = self.margeff_se tvalues = self.tvalues pvalues = self.pvalues if J > 1: for eq in range(J): restup = (results, margeff[:, eq], margeff_se[:, eq], tvalues[:, eq], pvalues[:, eq], conf_int[:, :, eq]) tble = summary_params(restup, yname=yname_list[eq], xname=exog_names, alpha=alpha, use_t=False, skip_header=True) tble.title = yname_list[eq] # overwrite coef with method name header = [ '', _transform_names[method], 'std err', 'z', 'P>|z|', '[' + str(alpha / 2), str(1 - alpha / 2) + ']' ] tble.insert_header_row(0, header) #from IPython.core.debugger import Pdb; Pdb().set_trace() table.append(tble) table = table_extend(table, keep_headers=True) else: restup = (results, margeff, margeff_se, tvalues, pvalues, conf_int) table = summary_params(restup, yname=yname, xname=exog_names, alpha=alpha, use_t=False, skip_header=True) header = [ '', _transform_names[method], 'std err', 'z', 'P>|z|', '[' + str(alpha / 2), str(1 - alpha / 2) + ']' ] table.insert_header_row(0, header) smry.tables.append(table) return smry
def summary(self, yname=None, xname=None, title=None, alpha=.05): """Summarize the Regression Results Parameters ---------- yname : string, optional Default is `y` xname : list of strings, optional Default is `var_##` for ## in p the number of regressors title : string, optional Title for the top table. If not None, then this replaces the default title alpha : float significance level for the confidence intervals Returns ------- smry : Summary instance this holds the summary tables and text, which can be printed or converted to various output formats. See Also -------- statsmodels.iolib.summary.Summary : class to hold summary results """ eigvals = self.eigenvals condno = self.condition_number top_left = [('Dep. Variable:', None), ('Model:', None), ('Method:', ['Least Squares']), ('Date:', None), ('Time:', None) ] top_right = [('Pseudo R-squared:', ["%#8.4g" % self.prsquared]), ('Bandwidth:', ["%#8.4g" % self.bandwidth]), ('Sparsity:', ["%#8.4g" % self.sparsity]), ('No. Observations:', None), ('Df Residuals:', None), ('Df Model:', None) ] if title is None: title = self.model.__class__.__name__ + ' ' + "Regression Results" # create summary table instance from statsmodels.iolib.summary import Summary smry = Summary() smry.add_table_2cols(self, gleft=top_left, gright=top_right, yname=yname, xname=xname, title=title) smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha, use_t=self.use_t) # add warnings/notes, added to text format only etext = [] if eigvals[-1] < 1e-10: wstr = "The smallest eigenvalue is %6.3g. This might indicate " wstr += "that there are\n" wstr += "strong multicollinearity problems or that the design " wstr += "matrix is singular." wstr = wstr % eigvals[-1] etext.append(wstr) elif condno > 1000: # TODO: what is recommended wstr = "The condition number is large, %6.3g. This might " wstr += "indicate that there are\n" wstr += "strong multicollinearity or other numerical " wstr += "problems." wstr = wstr % condno etext.append(wstr) if etext: smry.add_extra_txt(etext) return smry