def train(params=[10**-6, 10**-6, 10**-6, 10**-6]): global data_products, model_products alpha1, alpha2, lambda1, lambda2 = params for product_id in data_products: data = data_products[product_id].dropna() if len(data.index) <= 0: return X = data[[ 'amount_of_all_competitors', 'average_price_on_market', 'distance_to_cheapest_competitor', 'price_rank', 'quality_rank' ]] y = data['sold'].copy() y[y > 1] = 1 model = BayesianRidge(n_iter=1000, tol=0.0001, normalize=True) model.set_params(alpha_1=alpha1, alpha_2=alpha2, lambda_1=lambda1, lambda_2=lambda2) model.fit(X, y) model_products[product_id] = model
def bayes_regr(X, Y): regr = BayesianRidge(compute_score=True) regr.set_params(alpha_1=10, lambda_1=1e-3) regr.fit(X, Y) w_hat = regr.coef_ # Y_pred = regr.predict(X) return w_hat
def plot_bayesian_ridge_curvefit(): def func(x): return np.sin(2 * np.pi * x) # ############################################################################# # Generate sinusoidal data with noise size = 25 rng = np.random.RandomState(1234) x_train = rng.uniform(0., 1., size) y_train = func(x_train) + rng.normal(scale=0.1, size=size) x_test = np.linspace(0., 1., 100) # ############################################################################# # Fit by cubic polynomial n_order = 3 X_train = np.vander(x_train, n_order + 1, increasing=True) X_test = np.vander(x_test, n_order + 1, increasing=True) # ############################################################################# # Plot the true and predicted curves with log marginal likelihood (L) reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True) fig, axes = plt.subplots(1, 2, figsize=(8, 4)) for i, ax in enumerate(axes): # Bayesian ridge regression with different initial value pairs if i == 0: init = [1 / np.var(y_train), 1.] # Default values elif i == 1: init = [1., 1e-3] reg.set_params(alpha_init=init[0], lambda_init=init[1]) reg.fit(X_train, y_train) ymean, ystd = reg.predict(X_test, return_std=True) ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)") ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation") ax.plot(x_test, ymean, color="red", label="predict mean") ax.fill_between(x_test, ymean - ystd, ymean + ystd, color="pink", alpha=0.5, label="predict std") ax.set_ylim(-1.3, 1.3) ax.legend() title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format( init[0], init[1]) if i == 0: title += " (Default)" ax.set_title(title, fontsize=12) text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format( reg.alpha_, reg.lambda_, reg.scores_[-1]) ax.text(0.05, -1.0, text, fontsize=12) plt.tight_layout() plt.show()
class BayesianLinearRegression: def __init__(self, alphaInit=1., lambdaInit=0.2): self.alphaInit = alphaInit self.lambdaInit = lambdaInit self.clf = BayesianRidge(fit_intercept=False) self.clf.set_params(alpha_init=self.alphaInit, lambda_init=self.lambdaInit) def fit(self, X_train, y_train): self.clf.fit(X_train, y_train) def predict(self, X_test): return self.clf.predict(X_test.astype(np.float32), return_std=True)
def find_best_bayesian_ridge(X_train, y_train): reg = BayesianRidge(compute_score=True, tol=1e-5) parameters = { 'alpha_init': (0.2, 0.5, 1, 1.5), 'lambda_init': [1e-3, 1e-4, 1e-5, 1e-6] } srch = GridSearchCV(reg, parameters) srch.fit(X_train, y_train) params = srch.get_params() reg.set_params(alpha_init=params["estimator__alpha_init"], lambda_init=params["estimator__lambda_init"]) reg.fit(X_train, y_train) return reg, params
def trainModels(self, dates:np.array, scaler:MinMaxScaler): # First, splitting data x_train, x_test, y_train, y_test = train_test_split(scaler.transform(dates.reshape(-1, 1)), self.data_scaler.transform(self.tendency.values.reshape(-1 , 1)), test_size=0.2) x_train = np.vander(x_train.reshape(-1), self.n + 1, increasing=True) x_test = np.vander(x_test.reshape(-1), self.n + 1, increasing=True) # Bayesian Ridge bayRidge = BayesianRidge(tol=1e-6, compute_score=True) bayRidge.set_params(alpha_init=1, lambda_init=0.001) bayRidge.fit(x_train, y_train.reshape(-1)) bayScore = bayRidge.score(x_test, y_test.reshape(-1)) # SVR svr_rbf = SVR(kernel='rbf', C=1, gamma=0.25) svr_rbf.fit(x_train, y_train.reshape(-1)) svr_score = svr_rbf.score(x_test, y_test.reshape(-1)) return bayRidge, svr_rbf, np.array([bayScore, svr_score])
def cross_validate(params): global test_data_products, model_products alpha1, alpha2, lambda1, lambda2 = params data = test_data_products[1].dropna() if len(data.index) <= 0: return 0 X = data[[ 'amount_of_all_competitors', 'average_price_on_market', 'distance_to_cheapest_competitor', 'price_rank', 'quality_rank' ]] y = data['sold'].copy() y[y > 1] = 1 model = BayesianRidge(n_iter=1000, tol=0.0001, normalize=True) model.set_params(alpha_1=alpha1, alpha_2=alpha2, lambda_1=lambda1, lambda_2=lambda2) score = -np.mean(cross_val_score(model, X, y, cv=3, scoring='r2')) return score
X_test = np.vander(x_test, n_order + 1, increasing=True) reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True) # %% # Plot the true and predicted curves with log marginal likelihood (L) # ------------------------------------------------------------------- import matplotlib.pyplot as plt fig, axes = plt.subplots(1, 2, figsize=(8, 4)) for i, ax in enumerate(axes): # Bayesian ridge regression with different initial value pairs if i == 0: init = [1 / np.var(y_train), 1.0] # Default values elif i == 1: init = [1.0, 1e-3] reg.set_params(alpha_init=init[0], lambda_init=init[1]) reg.fit(X_train, y_train) ymean, ystd = reg.predict(X_test, return_std=True) ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)") ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation") ax.plot(x_test, ymean, color="red", label="predict mean") ax.fill_between(x_test, ymean - ystd, ymean + ystd, color="pink", alpha=0.5, label="predict std") ax.set_ylim(-1.3, 1.3) ax.legend() title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
break X_train.shape, X_test_scaled.shape y_mlp = best_model.predict(X_test_scaled) reg = BayesianRidge(compute_score=True, tol=1e-5) parameters = {'alpha_init':(0.2, 0.5, 1, 1.5), 'lambda_init':[1e-3, 1e-4, 1e-5,1e-6]} srch = GridSearchCV(reg, parameters) srch.fit(X_train, y_train) params = srch.get_params() reg.set_params(alpha_init=params["estimator__alpha_init"], lambda_init=params["estimator__lambda_init"]) reg.fit(X_train, y_train) ymean, ystd = reg.predict(X_test, return_std=True) # exit() folder = "./models/seirhcd/{}".format(current_dataset_date) os.makedirs(folder, exist_ok=True) joblib.dump(best_model, '{}/mlp.save'.format(folder)) joblib.dump(scaler, "{}/scaler.save".format(folder)) with open('{}/metrics.json'.format(folder), 'w') as fp: json.dump({"perf":reports,"std_test":list(ystd.values), "columns":columns, "countries":list(all_countries)}, fp) merged.to_csv('{}/features.csv'.format(folder))
# Fit by cubic polynomial n_order = 3 X_train = np.vander(x_train, n_order + 1, increasing=True) X_test = np.vander(x_test, n_order + 1, increasing=True) # ############################################################################# # Plot the true and predicted curves with log marginal likelihood (L) reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True) fig, axes = plt.subplots(1, 2, figsize=(8, 4)) for i, ax in enumerate(axes): # Bayesian ridge regression with different initial value pairs if i == 0: init = [1 / np.var(y_train), 1.] # Default values elif i == 1: init = [1., 1e-3] reg.set_params(alpha_init=init[0], lambda_init=init[1]) reg.fit(X_train, y_train) ymean, ystd = reg.predict(X_test, return_std=True) ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)") ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation") ax.plot(x_test, ymean, color="red", label="predict mean") ax.fill_between(x_test, ymean-ystd, ymean+ystd, color="pink", alpha=0.5, label="predict std") ax.set_ylim(-1.3, 1.3) ax.legend() title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format( init[0], init[1]) if i == 0: title += " (Default)" ax.set_title(title, fontsize=12)
def bayes_regression(params): clf = BayesianRidge() clf.set_params(**params) return clf