def print_tables(x, score): for title, value, d in x: table = _df_to_simpletable(d, float_format="%.2f", index=False) s = table.as_latex_tabular() s = single_tabular(s, title, score) print(s) print('\n')
def _linear_regression_train(table, feature_cols, label_col, fit_intercept=True, is_vif=False, vif_threshold=10): feature_names, features = check_col_type(table, feature_cols) label = table[label_col] if fit_intercept == True: features = sm.add_constant(features, has_constant='add') lr_model_fit = sm.OLS(label, features).fit() else: lr_model_fit = sm.OLS(label, features).fit() predict = lr_model_fit.predict(features) residual = label - predict summary = lr_model_fit.summary() summary_tables = simple_tables2df_list(summary.tables, drop_index=True) summary0 = summary_tables[0] summary1 = summary_tables[1] if type(features) != type(table): features = pd.DataFrame(features) if is_vif: summary1['VIF'] = [ variance_inflation_factor(features.values, i) for i in range(features.shape[1]) ] summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply( lambda _: 'true' if _ > vif_threshold else 'false') summary.tables[1] = _df_to_simpletable(summary1) summary2 = summary_tables[2] html_result = summary.as_html() plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict p1x = np.min(x) p2x = np.max(x) plt.plot([p1x, p2x], [p1x, p2x], 'r--') fig_actual_predict = plt2MD(plt) plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Linear Regression Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD( strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3))) model = _model_dict('linear_regression_model') model['features'] = feature_cols model['label'] = label_col model['coefficients'] = lr_model_fit.params model['fit_intercept'] = fit_intercept model['r2'] = lr_model_fit.rsquared model['adjusted_r2'] = lr_model_fit.rsquared_adj model['aic'] = lr_model_fit.aic model['bic'] = lr_model_fit.bic model['f_static'] = lr_model_fit.fvalue model['tvalues'] = lr_model_fit.tvalues model['pvalues'] = lr_model_fit.pvalues model['_repr_brtc_'] = rb.get() model['summary0'] = summary0 model['summary1'] = summary1 model['summary2'] = summary2 lr_model_fit.remove_data() model['lr_model'] = lr_model_fit return {'model': model}
def _linear_regression_train(table, feature_cols, label_col, fit_intercept=True, is_vif=True, vif_threshold=10): features = table[feature_cols] label = table[label_col] lr_model = LinearRegression(fit_intercept) lr_model.fit(features, label) predict = lr_model.predict(features) residual = label - predict if fit_intercept == True: features = sm.add_constant(features) lr_model_fit = sm.OLS(label, features).fit() else: lr_model_fit = sm.OLS(label, features).fit() summary = lr_model_fit.summary() summary_tables = simple_tables2df_list(summary.tables, drop_index=True) summary0 = summary_tables[0] summary1 = summary_tables[1] if is_vif: summary1['VIF'] = [ variance_inflation_factor(features.values, i) for i in range(features.shape[1]) ] summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply( lambda _: 'true' if _ > vif_threshold else 'false') summary.tables[1] = _df_to_simpletable(summary1) summary2 = summary_tables[2] html_result = summary.as_html() plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict y = np.array(label) a = x.size b = np.sum(x) c = b d = 0 for i in x: d += +i * i e = np.sum(y) f = 0 for i in range(0, x.size - 1): f += x[i] * y[i] det = a * d - b * c aa = (d * e - b * f) / det bb = (a * f - c * e) / det p1x = np.min(x) p1y = aa + bb * p1x p2x = np.max(x) p2y = aa + bb * p2x plt.plot([p1x, p2x], [p1y, p2y], 'r--') fig_actual_predict = plt2MD(plt) plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Linear Regression Result | ### Summary | """)) rb.addHTML(html_result) rb.addMD( strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3))) model = _model_dict('linear_regression_model') model['features'] = feature_cols model['label'] = label_col model['coefficients'] = lr_model_fit.params model['r2'] = lr_model_fit.rsquared model['adjusted_r2'] = lr_model_fit.rsquared_adj model['aic'] = lr_model_fit.aic model['bic'] = lr_model_fit.bic model['f_static'] = lr_model_fit.fvalue model['tvalues'] = lr_model_fit.tvalues model['pvalues'] = lr_model_fit.pvalues model['lr_model'] = lr_model model['_repr_brtc_'] = rb.get() model['summary0'] = summary0 model['summary1'] = summary1 model['summary2'] = summary2 return {'model': model}