def test_model_poisson_regressor(self): X, y = make_regression(n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3) y = numpy.abs(y) y = y / y.max() + 1e-5 model = linear_model.PoissonRegressor().fit(X, y) model_onnx = convert_sklearn( model, "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.check_model(model_onnx, X.astype(numpy.float32)) dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="SklearnPoissonRegressor-Dec4") model_onnx = convert_sklearn( model, "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(X.astype(numpy.float64), model, model_onnx, basename="SklearnPoissonRegressor64")
def fit_glm(self, tol=1e-12, pretest=False): # added on D5 clf = linear_model.PoissonRegressor(fit_intercept=False, tol=tol, verbose=3, alpha=0) clf.fit(spr.diags(1 / self.d_a) @ self.C(), self.μhat_a, sample_weight=self.d_a) return clf.coef_[0:-self.nbk], clf.coef_[-self.nbk:]
def solveGLM(self, σ, tol=1e-9): # added on D3 ptm = time() muhat_a = (self.n_x.reshape((self.nbx, -1)) @ self.m_y.reshape( (-1, self.nby))).flatten() / self.n_x.sum() ot_as_glm = linear_model.PoissonRegressor(fit_intercept=False, tol=tol, verbose=3, alpha=0) ot_as_glm.fit(-self.M_z_a().T, muhat_a * np.exp(-self.Φ_a / σ), sample_weight=np.exp(self.Φ_a / σ)) p = σ * ot_as_glm.coef_ u_x, v_y = p[:self.nbx] - p[0], p[self.nbx:] + p[0] μ_x_y = np.exp((self.Φ_a.reshape((self.nbx, -1)) - u_x.reshape( (-1, 1)) - v_y.reshape((1, -1))) / σ) valobs = self.Φ_a.dot(μ_x_y.flatten()) valtot = valobs - σ * sum_xlogx(μ_x_y) taken = time() - ptm return μ_x_y, u_x, v_y, valobs, valtot, None, taken, 'GLM'
def generate_model(pred_vars, log_transform=True, one_hot_week=False, method="lm"): """ Generate the model for transforming and predicting. ... """ assert method in ['lm', 'poisson'], "method must be one of 'lm' or 'poisson'" if log_transform: ft = preprocessing.FunctionTransformer(np.log) else: ft = preprocessing.FunctionTransformer() if one_hot_week: model_prep = compose.ColumnTransformer( [("onehot_categorical", preprocessing.OneHotEncoder(), ["week_num"]), ("num_scaler", ft, pred_vars)], remainder="drop", ) else: model_prep = compose.ColumnTransformer( [("num_scaler", ft, pred_vars + ['ca_prop'])], remainder="drop", ) if method == 'lm': pipe = pipeline.Pipeline([("preprocessor", model_prep), ("regressor", linear_model.LinearRegression())]) elif method == 'poisson': pipe = pipeline.Pipeline([ ("preprocessor", model_prep), ("regressor", linear_model.PoissonRegressor(alpha=1e-12, max_iter=10000)) ]) return pipe
regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)), regression(linear_model.GammaRegressor()), regression(linear_model.HuberRegressor()), regression(linear_model.Lars()), regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsCV()), regression(linear_model.LassoLarsIC()), regression(linear_model.LinearRegression()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), regression(linear_model.PoissonRegressor()), regression( linear_model.RANSACRegressor( base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS), random_state=RANDOM_SEED)), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), regression(linear_model.TweedieRegressor(power=0.0)), regression(linear_model.TweedieRegressor(power=1.0)), regression(linear_model.TweedieRegressor(power=1.5)), regression(linear_model.TweedieRegressor(power=2.0)), regression(linear_model.TweedieRegressor(power=3.0)), # Statsmodels Linear Regression
import matplotlib.pyplot as plt from sklearn import linear_model from sklearn.model_selection import train_test_split from helper import prepare_data df = prepare_data() y = df["Berri1"] X = df[[ "day", "month", "day_of_week", "Mean Temp (°C)", "Total Precip (mm)", "Snow on Grnd (cm)", "Min Temp (°C)", "Max Temp (°C)" ]] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) clf = linear_model.PoissonRegressor(max_iter=200) clf.fit(X_train, y_train) print(clf.score(X_test, y_test)) result = clf.predict(X) plt.plot(list(y.index), y, label="true") plt.plot(list(y.index), result, label="predicted") plt.legend() plt.show()
# The coefficients # print('Coefficients: \n', regr.coef_) # The mean squared error print('Mean squared error: %.2f' # % mean_squared_error(y_test, y_pred)) # The coefficient of determination: 1 is perfect prediction print('Coefficient of determination: %.2f' # % r2_score(y_test, y_pred)) scores_length_no_reg = cross_val_score(regr, X_train_std, y_train, cv=5, scoring='r2') regr.fit(X_train_std, y_train) #The mean score and the standard deviation are hence given by: print("%0.2f (alpha = 1.0) accuracy with a standard deviation of %0.2f" % (scores_length_no_reg.mean(), scores_length_no_reg.std())) # Alpha = 0.1 regr_l1_1 = linear_model.PoissonRegressor(alpha=0.01) scores_length_l1_1_reg = cross_val_score(regr_l1_1, X_train_std, y_train, cv=5, scoring='r2') regr_l1_1.fit(X_train_std, y_train) #The mean score and the standard deviation are hence given by: print("%0.2f (alpha = 0.01) accuracy with a standard deviation of %0.2f" % (scores_length_l1_1_reg.mean(), scores_length_l1_1_reg.std())) # Alpha = 30 regr_l1_30 = linear_model.PoissonRegressor(alpha=30) scores_length_l1_30_reg = cross_val_score(regr_l1_30, X_train_std, y_train, cv=5, scoring='r2') regr_l1_30.fit(X_train_std, y_train) #The mean score and the standard deviation are hence given by: print("%0.2f (alpha = 30) accuracy with a standard deviation of %0.2f" % (scores_length_l1_30_reg.mean(), scores_length_l1_30_reg.std())) # Alpha = 100 regr_l1_100 = linear_model.PoissonRegressor(alpha=0.001) scores_length_l1_100_reg = cross_val_score(regr_l1_100, X_train_std, y_train, cv=5, scoring='r2')
def _init_model(self): return linear_model.PoissonRegressor(alpha=1e-4, warm_start=True)
plt.scatter(X, y, color='blue', alpha=0.5, label='ToyData') #plt.legend() plt.show() df = pd.DataFrame([X, y]).T.rename({0: 'x', 1: 'y'}, axis=1) # df['x_round']=df['x'].apply(lambda x:np.round(x,1)) # for i in range(x_min, x_max): # plt.hist( df[df['x_round']==i]['y']) # plt.title(np.exp(w[0]+w[1]*i)) # plt.show() lr = linear_model.LinearRegression() df['logy'] = np.log(df['y']) lr.fit(df[['x']], df['logy']) print([lr.intercept_, lr.coef_[0]]) pr = linear_model.PoissonRegressor(alpha=0, fit_intercept=True, max_iter=300) pr.fit(df[['x']], df['y']) print([pr.intercept_, pr.coef_[0]]) plt.scatter(X, y, color='blue', alpha=0.5) plt.plot(x, lam, color='red', label='True') plt.plot(x, lam + np.sqrt(lam), color='red', label='True+sd', linestyle='dashed') plt.plot(x, lam - np.sqrt(lam), color='red', label='True-sd', linestyle='dashed')