def summary(self, yname=None, xname=None, title='Generalized linear model', returns='print'): """ Print a table of results or returns SimpleTable() instance which summarizes the Generalized linear model results. Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import scikits.statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- stand_errors are not implimented. conf_int calculated from normal dist. """ import time as Time from iolib import SimpleTable from stattools import jarque_bera, omni_normtest, durbin_watson if yname is None: yname = 'Y' if xname is None: xname = ['x%d' % i for i in range(self.model.exog.shape[1])] #List of results used in summary #yname = yname #xname = xname time = Time.localtime() dist_family = self.model.family.__class__.__name__ aic = self.aic bic = self.bic deviance = self.deviance df_model = self.df_model df_resid = self.df_resid fittedvalues = self.fittedvalues llf = self.llf mu = self.mu nobs = self.nobs normalized_cov_params = self.normalized_cov_params null_deviance = self.null_deviance params = self.params pearson_chi2 = self.pearson_chi2 pinv_wexog = self.pinv_wexog resid_anscombe = self.resid_anscombe resid_deviance = self.resid_deviance resid_pearson = self.resid_pearson resid_response = self.resid_response resid_working = self.resid_working scale = self.scale #TODO #stand_errors = self.stand_errors stand_errors = [' ' for x in range(len(self.params))] #Added note about conf_int pvalues = self.pvalues conf_int = self.conf_int() cov_params = self.cov_params() #f_test() = self.f_test() t = self.tvalues #t_test = self.t_test() table_1l_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%s"], empty_cell = '', colwidths = 17, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) # Note table_1l_fmt over rides the below formating. table_1r_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s", "%S"], empty_cell = '', colwidths = 16, colsep=' ', row_pre = '', row_post = '', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) table_2_fmt = dict( data_fmts = ["%s", "%s", "%s", "%s"], #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell = '', colwidths = 14, colsep=' ', row_pre = ' ', row_post = ' ', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) ######## summary table 1 ####### table_1l_title = title table_1l_header = None table_1l_stubs = ('Model Family:', 'Method:', 'Dependent Variable:', 'Date:', 'Time:', ) table_1l_data = [ [dist_family], ['IRLS'], [yname], [Time.strftime("%a, %d %b %Y",time)], [Time.strftime("%H:%M:%S",time)], ] table_1l = SimpleTable(table_1l_data, table_1l_header, table_1l_stubs, title=table_1l_title, txt_fmt = table_1l_fmt) table_1r_title = None table_1r_header = None table_1r_stubs = ('# of obs:', 'Df residuals:', 'Df model:', 'Scale:', 'Log likelihood:' ) table_1r_data = [ [nobs], [df_resid], [df_model], ["%#6.4f" % (scale,)], ["%#6.4f" % (llf,)] ] table_1r = SimpleTable(table_1r_data, table_1r_header, table_1r_stubs, title=table_1r_title, txt_fmt = table_1r_fmt) ######## summary table 2 ####### #TODO add % range to confidance interval column header table_2header = ('coefficient', 'stand errors', 't-statistic', 'Conf. Interval') table_2stubs = xname table_2data = zip(["%#6.4f" % (params[i]) for i in range(len(xname))], [stand_errors[i] for i in range(len(xname))], ["%#6.4f" % (t[i]) for i in range(len(xname))], ["""[%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in range(len(xname))]) #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} table_2 = SimpleTable(table_2data, table_2header, table_2stubs, title=None, txt_fmt = table_2_fmt) ######## Return Summary Tables ######## # join table table_s then print if returns == 'text': table_1l.extend_right(table_1r) return str(table_1l) + '\n' + str(table_2) elif returns == 'print': table_1l.extend_right(table_1r) print(str(table_1l) + '\n' + str(table_2)) elif returns == 'tables': return [table_1l, table_1r, table_2] #return [table_1, table_2 ,table_3L, notes] elif returns == 'csv': return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \ table_3L.as_csv() elif returns == 'latex': print('not avalible yet') elif returns == html: print('not avalible yet')
def summary(self, yname=None, xname=None): """returns a string that summarizes the regression results Parameters ----------- yname : string, optional Default is `Y` xname : list of strings, optional Default is `X.#` for # in p the number of regressors Returns ------- String summarizing the fit of a linear model. Examples -------- >>> import scikits.statsmodels as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- All residual statistics are calculated on whitened residuals. """ import time from iolib import SimpleTable from stattools import jarque_bera, omni_normtest, durbin_watson if yname is None: yname = self.model.endog_names if xname is None: xname = self.model.exog_names modeltype = self.model.__class__.__name__ llf, aic, bic = self.llf, self.aic, self.bic JB, JBpv, skew, kurtosis = jarque_bera(self.wresid) omni, omnipv = omni_normtest(self.wresid) t = time.localtime() part1_fmt = dict( data_fmts = ["%s"], empty_cell = '', colwidths = 15, colsep=' ', row_pre = '| ', row_post = '|', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = "r", stubs_align = "l", fmt = 'txt' ) part2_fmt = dict( #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#10.4g","%#10.4g","%#6.4g"], data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell = '', #colwidths = 10, colsep=' ', row_pre = '| ', row_post = '|', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) part3_fmt = dict( data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], empty_cell = '', colwidths = None, colsep=' ', row_pre = '| ', row_post = ' |', table_dec_above=None, table_dec_below='-', header_dec_below='-', header_fmt = '%s', stub_fmt = '%s', title_align='c', header_align = 'r', data_aligns = 'r', stubs_align = 'l', fmt = 'txt' ) # Print the first part of the summary table part1data = [[yname], [modeltype], ['Least Squares'], [time.strftime("%a, %d %b %Y",t)], [time.strftime("%H:%M:%S",t)], [self.nobs], [self.df_resid], [self.df_model]] part1header = None part1title = 'Summary of Regression Results' part1stubs = ('Dependent Variable:', 'Model:', 'Method:', 'Date:', 'Time:', '# obs:', 'Df residuals:', 'Df model:') part1 = SimpleTable(part1data, part1header, part1stubs, title=part1title, txt_fmt = part1_fmt) ######## summary Part 2 ####### part2data = zip([self.params[i] for i in range(len(xname))], [self.bse[i] for i in range(len(xname))], [self.t()[i] for i in range(len(xname))], [self.pvalues[i] for i in range(len(xname))]) part2header = ('coefficient', 'std. error', 't-statistic', 'prob.') part2stubs = xname #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} part2 = SimpleTable(part2data, part2header, part2stubs, title=None, txt_fmt = part2_fmt) self.summary2 = part2 ######## summary Part 3 ####### part3Lheader = ['Models stats'] part3Rheader = ['Residual stats'] part3Lstubs = ('R-squared:', 'Adjusted R-squared:', 'F-statistic:', 'Prob (F-statistic):', 'Log likelihood:', 'AIC criterion:', 'BIC criterion:',) part3Rstubs = ('Durbin-Watson:', 'Omnibus:', 'Prob(Omnibus):', 'JB:', 'Prob(JB):', 'Skew:', 'Kurtosis:') part3Ldata = [[self.rsquared], [self.rsquared_adj], [self.fvalue], [self.f_pvalue], [llf], [aic], [bic]] part3Rdata = [[durbin_watson(self.wresid)], [omni], [omnipv], [JB], [JBpv], [skew], [kurtosis]] part3L = SimpleTable(part3Ldata, part3Lheader, part3Lstubs, txt_fmt = part3_fmt) part3R = SimpleTable(part3Rdata, part3Rheader, part3Rstubs, txt_fmt = part3_fmt) part3L.extend_right(part3R) ######## Return Summary Tables ######## # join table parts then print table = str(part1) + '\n' + str(part2) + '\n' + str(part3L) #TODO: return should require a argument in regression.summary(text) # __str__ can be define to retun regression.summary(text) for printing to # screen. This would take better advantage of table.SimpleTable return table
def summary_old(self, yname=None, xname=None, title='Generalized linear model', returns='text'): """ Print a table of results or returns SimpleTable() instance which summarizes the Generalized linear model results. Parameters ----------- yname : string optional, Default is `Y` xname : list of strings optional, Default is `X.#` for # in p the number of regressors title : string optional, Defualt is 'Generalized linear model' returns : string 'text', 'table', 'csv', 'latex', 'html' Returns ------- Defualt : returns='print' Prints the summarirized results Option : returns='text' Prints the summarirized results Option : returns='table' SimpleTable instance : summarizing the fit of a linear model. Option : returns='csv' returns a string of csv of the results, to import into a spreadsheet Option : returns='latex' Not implimented yet Option : returns='HTML' Not implimented yet Examples (needs updating) -------- >>> import scikits.statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> ols_results = sm.OLS(data.endog, data.exog).results >>> print ols_results.summary() ... Notes ----- stand_errors are not implimented. conf_int calculated from normal dist. """ import time as Time from iolib import SimpleTable from stattools import jarque_bera, omni_normtest, durbin_watson yname = 'Y' if xname is None: xname = ['x%d' % i for i in range(self.model.exog.shape[1])] #List of results used in summary #yname = yname #xname = xname time = Time.localtime() dist_family = self.model.family.__class__.__name__ aic = self.aic bic = self.bic deviance = self.deviance df_model = self.df_model df_resid = self.df_resid fittedvalues = self.fittedvalues llf = self.llf mu = self.mu nobs = self.nobs normalized_cov_params = self.normalized_cov_params null_deviance = self.null_deviance params = self.params pearson_chi2 = self.pearson_chi2 pinv_wexog = self.pinv_wexog resid_anscombe = self.resid_anscombe resid_deviance = self.resid_deviance resid_pearson = self.resid_pearson resid_response = self.resid_response resid_working = self.resid_working scale = self.scale #TODO #stand_errors = self.stand_errors stand_errors = self.bse #[' ' for x in range(len(self.params))] #Added note about conf_int pvalues = self.pvalues conf_int = self.conf_int() cov_params = self.cov_params() #f_test() = self.f_test() t = self.tvalues #t_test = self.t_test() table_1l_fmt = dict(data_fmts=["%s", "%s", "%s", "%s", "%s"], empty_cell='', colwidths=15, colsep=' ', row_pre=' ', row_post=' ', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') # Note table_1l_fmt over rides the below formating. in extend_right? JP table_1r_fmt = dict(data_fmts=["%s", "%s", "%s", "%s", "%1s"], empty_cell='', colwidths=12, colsep=' ', row_pre='', row_post='', table_dec_above='=', table_dec_below='', header_dec_below=None, header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns="r", stubs_align="l", fmt='txt') table_2_fmt = dict( data_fmts=["%s", "%s", "%s", "%s"], #data_fmts = ["%#12.6g","%#12.6g","%#10.4g","%#5.4g"], #data_fmts = ["%#10.4g","%#6.4f", "%#6.4f"], #data_fmts = ["%#15.4F","%#15.4F","%#15.4F","%#14.4G"], empty_cell='', colwidths=13, colsep=' ', row_pre=' ', row_post=' ', table_dec_above='=', table_dec_below='=', header_dec_below='-', header_fmt='%s', stub_fmt='%s', title_align='c', header_align='r', data_aligns='r', stubs_align='l', fmt='txt') ######## summary table 1 ####### table_1l_title = title table_1l_header = None table_1l_stubs = ( 'Model Family:', 'Method:', 'Dependent Variable:', 'Date:', 'Time:', ) table_1l_data = [ [dist_family], ['IRLS'], [yname], [Time.strftime("%a, %d %b %Y", time)], [Time.strftime("%H:%M:%S", time)], ] table_1l = SimpleTable(table_1l_data, table_1l_header, table_1l_stubs, title=table_1l_title, txt_fmt=table_1l_fmt) table_1r_title = None table_1r_header = None table_1r_stubs = ('# of obs:', 'Df residuals:', 'Df model:', 'Scale:', 'Log likelihood:') table_1r_data = [[nobs], [df_resid], [df_model], ["%#6.4f" % (scale, )], ["%#6.4f" % (llf, )]] table_1r = SimpleTable(table_1r_data, table_1r_header, table_1r_stubs, title=table_1r_title, txt_fmt=table_1r_fmt) ######## summary table 2 ####### #TODO add % range to confidance interval column header table_2header = ('coefficient', 'stand errors', 't-statistic', 'Conf. Interval') table_2stubs = xname table_2data = zip( ["%#6.4f" % (params[i]) for i in range(len(xname))], ["%#6.4f" % stand_errors[i] for i in range(len(xname))], ["%#6.4f" % (t[i]) for i in range(len(xname))], [ """ [%#6.3f, %#6.3f]""" % tuple(conf_int[i]) for i in range(len(xname)) ]) #dfmt={'data_fmt':["%#12.6g","%#12.6g","%#10.4g","%#5.4g"]} table_2 = SimpleTable(table_2data, table_2header, table_2stubs, title=None, txt_fmt=table_2_fmt) ######## Return Summary Tables ######## # join table table_s then print if returns == 'text': table_1l.extend_right(table_1r) return str(table_1l) + '\n' + str(table_2) elif returns == 'print': table_1l.extend_right(table_1r) print(str(table_1l) + '\n' + str(table_2)) elif returns == 'tables': return [table_1l, table_1r, table_2] #return [table_1, table_2 ,table_3L, notes] elif returns == 'csv': return table_1.as_csv() + '\n' + table_2.as_csv() + '\n' + \ table_3L.as_csv() elif returns == 'latex': print('not avalible yet') elif returns == html: print('not avalible yet')