def ElasticNet(X, y, alphas=np.arange(0, 1.1, 0.1), cv=5, sample_weight=None, standardize=False, random_state=None, fit_intercept=False): """ Elastic Net with cross-validation for otpimal alpha and lambda """ mses = np.array([]) cv_result_dict = {} for i, alpha in enumerate(alphas): cv_enet = glmnet.ElasticNet(alpha=alpha, standardize=standardize, fit_intercept=fit_intercept, n_splits=cv, scoring='mean_squared_error', random_state=random_state) cv_enet.fit(X, y, sample_weight=sample_weight) mses = np.append(mses, cv_enet.cv_mean_score_.max()) cv_result_dict[f'cv_result_{i}'] = cv_enet cv_max_model = cv_result_dict[f'cv_result_{np.argmax(mses)}'] return cv_max_model.coef_
def __init__(self, alpha, n_splits, norm_x=None, norm_y=None, lambda_path=None, scoring="mean_squared_error", shuffle=True): ''' Initializes the model. Parameters: ----------- @param alpha: mixing parameter for the elastic net @param lambda_path: (optional) path for regularization parameter lambda ''' # initialize elastic net model self.modelNet = glmnet.ElasticNet(alpha=alpha, lambda_path=lambda_path, standardize=False, scoring=scoring, n_splits=n_splits) self.modelLm = self.modelLm = lm.LinearRegression() self._norm_x = norm_x if self._norm_x is None: self._norm_x = util.NoNormalizer() self._norm_y = norm_y if self._norm_y is None: self._norm_y = util.NoNormalizer() self._norm_x_trained = None self._norm_y_trained = None self._shuffle = shuffle
def cross_validation(X_train, y_train, X_test, y_test, cv=10, lasso=True): alpha = 1 if lasso else 0 model = glmnet.ElasticNet(alpha=alpha, n_splits=cv, standardize=False, lambda_path=lambdas[::-1]).fit(X_train, y_train) print(f"best lambda for {'lasso' if lasso else 'ridge'} is {model.lambda_max_}") y_pred = model.predict(X_test) rmse = mean_squared_error(y_test, y_pred, squared=False) print(f"RMSE is {rmse}")
def AdaptiveLasso(X, y, logistic=False, sample_weight=None, adaptive_weights=None, random_state=None): """ Adaptive Lasso with cross-validation for otpimal lambda """ if logistic: enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy', alpha=1) enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight) else: enet = glmnet.ElasticNet(standardize=False, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=1) enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight) return enet.coef_
def run_regression(df, dataset, X, y, reg_alpha, reg_lambda, standardize=True): if standardize: X = preprocessing.StandardScaler().fit_transform(X) y = preprocessing.scale(y) metric = 'RMSE' density = get_density(X) param = { 'booster': 'gblinear', 'updater': 'coord_descent', 'reg_alpha': reg_alpha, 'reg_lambda': reg_lambda, 'debug_verbose': 0 } run_xgboost_regression(df, X, y, param, dataset, reg_alpha, reg_lambda, metric, density) param['updater'] = 'shotgun' run_xgboost_regression(df, X, y, param, dataset, reg_alpha, reg_lambda, metric, density) param['updater'] = 'gpu_coord_descent' run_xgboost_regression(df, X, y, param, dataset, reg_alpha, reg_lambda, metric, density) tmp = time.time() enet = linear_model.ElasticNet(alpha=reg_alpha + reg_lambda, l1_ratio=reg_alpha / (reg_alpha + reg_lambda)) enet.fit(X, y) enet_time = time.time() - tmp enet_score = np.sqrt(metrics.mean_squared_error(y, enet.predict(X))) enet_zero = count_zero_coefficients(enet.coef_) df.loc[len(df)] = [ dataset, X.shape[1], X.shape[0], density, 'Regression', reg_alpha, reg_lambda, 'Sklearn', metric, enet_score, enet_time, enet.n_iter_, enet_zero ] tmp = time.time() glm = glmnet.ElasticNet(alpha=reg_alpha / (reg_alpha + reg_lambda), lambda_path=[reg_alpha + reg_lambda]) glm.fit(X, y) glmnet_time = time.time() - tmp glmnet_score = np.sqrt( metrics.mean_squared_error( y, glm.predict(X, lamb=[reg_alpha + reg_lambda]))) glmnet_zero = count_zero_coefficients(glm.coef_) df.loc[len(df)] = [ dataset, X.shape[1], X.shape[0], density, 'Regression', reg_alpha, reg_lambda, 'Glmnet', metric, glmnet_score, glmnet_time, '-', glmnet_zero ]
def AdaptiveLasso(X, y, standardize=False, sample_weight=None, weight_Adaptive=None, cv=5, random_state=None, fit_intercept=False): """ Adaptive Lasso with cross-validation for otpimal lambda """ adalasso = glmnet.ElasticNet(alpha=1, standardize=standardize, fit_intercept=fit_intercept, n_splits=cv, scoring='mean_squared_error', random_state=random_state) adalasso.fit(X, y, relative_penalties=1 / weight_Adaptive, sample_weight=sample_weight) return adalasso.coef_
def ElasticNet(X, y, logistic=False, sample_weight=None, random_state=None): """ Elastic Net with cross-validation for otpimal alpha and lambda """ mses = np.array([]) cv_result_dict = {} if logistic: for i, alpha in enumerate(np.arange(0, 1.1, 0.1)): cv_enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy', alpha=alpha).fit(X, y, sample_weight=sample_weight) cv_enet.fit(X, y, sample_weight=sample_weight) mses = np.append(mses, cv_enet.cv_mean_score_.max()) cv_result_dict[f'cv_result_{i}'] = cv_enet else: for i, alpha in enumerate(np.arange(0, 1.1, 0.1)): cv_enet = glmnet.ElasticNet(standardize=False, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=alpha).fit(X, y, sample_weight=sample_weight) cv_enet.fit(X, y, sample_weight=sample_weight) mses = np.append(mses, cv_enet.cv_mean_score_.max()) cv_result_dict[f'cv_result_{i}'] = cv_enet cv_max_model = cv_result_dict[f'cv_result_{np.argmax(mses)}'] return cv_max_model.coef_
def _model(self): """ Function to initialize a ElasticNet model. """ model = glmnet.ElasticNet( alpha=self.alpha, n_lambda=self.n_lambda, min_lambda_ratio=self.min_lambda_ratio, lambda_path=self.lambda_path, standardize=self.scale, fit_intercept=self.fit_intercept, cut_point=self.cut_point, n_splits=self.n_splits, scoring=self.metric, n_jobs=-1, tol=self.tol, max_iter=self.max_iter, random_state=self.random_state, max_features=self.max_features, verbose=False, ) return model
"estimator,iter_num,n_selected_var,recall,precision,f1_score,exec_time\n" ) for data in dataset_names: data_dir = os.path.join('Simulation_Data', data) beta = np.load(os.path.join(data_dir, 'beta0.npy')) for iter_num in range(n_iter): print(iter_num) X = np.load(os.path.join(data_dir, f'x_tr{iter_num}.npy')) y = np.load(os.path.join(data_dir, f'y_tr{iter_num}.npy')) start_time = time.time() lasso = glmnet.ElasticNet(alpha=1, standardize=True, fit_intercept=False, n_splits=5, scoring='mean_squared_error', n_jobs=-1, random_state=random_state) lasso.fit(X, y) end_time = time.time() record.record_result(record_path, method, beta, beta_hat=enet.coef_, exec_time=end_time - start_time)
beta = np.load(os.path.join(data_dir, 'beta0.npy')) for iter_num in range(n_iter): print(iter_num) X = np.load(os.path.join(data_dir, f'x_tr{iter_num}.npy')) y = np.load(os.path.join(data_dir, f'y_tr{iter_num}.npy')) start_time = time.time() alphas = np.arange(0.1, 1.0, 0.1) mses = np.array([]) for i in alphas: cv_enet = glmnet.ElasticNet(standardize=True, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=i, n_jobs=-1, random_state=random_state).fit(X, y) mses = np.append(mses, cv_enet.cv_mean_score_.max()) opt_alpha = alphas[mses.argmax()] enet = glmnet.ElasticNet(standardize=True, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=opt_alpha, n_jobs=-1, random_state=random_state) enet.fit(X, y) end_time = time.time()