def Regularized_Smap(abund, target_otu, theta, l_grid, iteration, cv, train_len): print('Process data for otu No. %s' % str(target_otu+1)) # Make input for the elastic_net block = np.append(abund[1:, target_otu], abund[0:-1, ], axis=1) ##Delete the uncontinuous states block = np.delete(block, [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], axis=0) ##Scaling the input ##Each time series is normalized to have a mean of 0 and standard deviation of 1 before analysis with S-maps block = (block - np.average(block, axis=0)) / np.std(block, axis=0) ##Select data and fitting print('Start fitting.') lib = range(block.shape[0]) coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1)) fit_results = np.empty(shape=(block.shape[0], 13)) for ipred in lib: print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True) sub_block = np.delete(block, ipred, axis=0) q = block[lib[ipred], :] ###Calculate weights E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1)) w = make_weights(E_dist, theta) ###Weighted predictors and responses X_wp = weight_data(sub_block[:, 1:], w) Y_wp = np.ravel(weight_data(sub_block[:, 0], w)) X_target = block[ipred, 1:] Y_target = block[ipred, 0] ##Split training and test data pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False) X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0) X_test = X_wp[pick_test, :] Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target) Y_test = Y_wp[pick_test] ###Fit function regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration, l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))]) regr.fit(X_train, Y_train) rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2)) rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2)) coefs[ipred, :] = regr.coef_ fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std( Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max( Y_test), min(Y_test) print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True) # Output results coefs = pd.DataFrame(data=coefs) coefs.to_csv('../Output/test/0/coefs/%s_%s_coefs.csv' % (target_otu, theta)) fit_results = pd.DataFrame( columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score', 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'], data=fit_results) fit_results.to_csv('../Output/test/0/fit_result/%s_%s_fit_results.csv' % (target_otu, theta))
def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100, n_informative_features=100) max_iter = 150 # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert clf.l1_ratio_ == min(clf.l1_ratio) clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter, precompute=True) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert clf.l1_ratio_ == min(clf.l1_ratio) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert clf.score(X_test, y_test) > 0.99 # Multi-output/target case X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3) clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert clf.score(X_test, y_test) > 0.99 assert clf.coef_.shape == (3, 10) # Mono-output should have same cross-validated alpha_ and l1_ratio_ # in both cases. X, y, _, _ = build_dataset(n_features=10) clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf1.fit(X, y) clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf2.fit(X, y[:, np.newaxis]) assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_) assert_almost_equal(clf1.alpha_, clf2.alpha_)
def train_elasticnet_model(self, mode, ffm): # X_train = np.array(self.X_train[mode]) X_train = np.array(self.X_train2) y_train = np.array(self.y_train[ffm]) # X_val = np.array(self.X_val[mode]) X_val = np.array(self.X_val2) y_val = np.array(self.y_val[ffm]) l1ratios = np.linspace(0.1, 1, 10) mses = [] alps = [] verr = [] for l1 in l1ratios: print(l1) enet = ElasticNetCV(l1_ratio=l1, cv=10) enet.fit(X_train, y_train) y_pred = enet.predict(X_val) mse = mean_squared_error(y_val, y_pred) v = enet.score(X_val, y_val) mses.append(mse) alps.append(enet.alpha_) verr.append(v) i_opt = np.argmin(mses) l1_opt = l1ratios[i_opt] alpha_opt = alps[i_opt] print("optimal l1", l1_opt) print("optimal alpha", alpha_opt) enet2 = ElasticNetCV(l1_ratio=l1_opt) enet2.fit(X_train, y_train) y_pred = enet2.predict(X_val) y_pred_train = enet2.predict(X_train) print("Training MSE", mean_squared_error(y_train, y_pred_train)) print("Validation MSE", mean_squared_error(y_val, y_pred)) print("Training Pearson R", pearsonr(y_train, y_pred_train)) print("Validation Pearson R", pearsonr(y_val, y_pred)) print("Training R2 score:", enet.score(X_train, y_train)) print("Validation R2 score:", enet.score(X_val, y_val)) # print(enet2.alpha_) key = tuple(mode + [ffm]) self.elasticnet[key] = enet2 return self.elasticnet[key]
def elastic_net(Xtrain, Ytrain, Xdev, Ydev, verbose=False): """ Trains and Elastic Net Linear Model on the provided. Scores the model and returns both the model and the score. It also prints the optimal hyperparameters. Inputs: Xtrain Ytrain Xdev Ydev Returns: float: the R^2 on the dev data for the best model specifications. ElasticNetCV: the trained model. """ print("\n========================\nTraining Elastic Net\n") enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], max_iter=1e4, tol=1e-2) enet.fit(Xtrain, Ytrain) best_score = enet.score(Xdev, Ydev) results = { "R2": best_score, "alpha": enet.alpha_, "l1_ratio": enet.l1_ratio_ } if verbose: results['coefficients'] = enet.coef_.tolist() print(results, indent=4) return best_score, enet
def score(inEval, X, y): indMatrix = pd.DataFrame() i = 0 listEval = list(inEval) for ele in listEval: evalString = updatedEvalString(ele) #Exception handling against log(0) try: indMatrix[str.format('col{0}', i)] = eval(evalString) except ZeroDivisionError: continue i = i + 1 # Remove inf with 1 indMatrix = indMatrix.replace([np.inf, -np.inf], 1) # Linear regression with elastic net """ regr = ElasticNet(random_state=0, l1_ratio = 0.1) regr.fit(indMatrix,y_train) y_p = regr.predict(indMatrix) regr.score(indMatrix,y_train)""" regr = ElasticNetCV(cv=2, random_state=0, max_iter=5000) regr.fit(indMatrix, y) return (regr.score(indMatrix, y))
def elastic_net_cv(problem, **kwargs): r"""High level description. Parameters ---------- kwargs['elastic_net_reg_coefs'] must be a list of nonnegative float. These are the multiplier for the penalty term in cross-validation of EN kwargs['elastic_net_ratio'] must be between 0 and 1 kwargs['coef_tolerance'] must be a nonnegative float Returns ------- output : tuple (optimum, maximum) """ data_list = [datum['data']['values'] for datum in problem.data] data = numpy.array(data_list) elastic_net = ElasticNetCV(alphas=kwargs['elastic_net_reg_coefs'], l1_ratio=kwargs['elastic_net_ratio']) elastic_net.fit(data.T, problem.goal['data']['values']) elastic_net_coefficients = elastic_net.coef_ optimum = [ problem.data[index] for index, element in enumerate(elastic_net_coefficients) if abs(element) > kwargs['coef_tolerance'] ] maximum = elastic_net.score(data.T, problem.goal['data']['values']) output = (optimum, maximum) return output
def elastic_net(A, y, positive=True): A_scaler = StandardScaler().fit(A[:, 1:]) y_scaler = StandardScaler().fit(y.reshape(-1, 1)) A_new = A_scaler.transform(A[:, 1:]) y_new = y_scaler.transform(y.reshape(-1, 1)).reshape(-1) clf = ElasticNetCV(l1_ratio=[0.1, 0.5, 1.0], cv=5, n_jobs=8, normalize=False, fit_intercept=False, positive=positive).fit(A_new, y_new) score = clf.score(A_new, y_new) # Approximate assuming the elastic net is very close to the lasso df = np.count_nonzero(clf.coef_) logging.info( "[ElasticNet] # iter: %d, alpha: %e, l1_ratio: %.2f, # of terms: %d, score: %f", clf.n_iter_, clf.alpha_, clf.l1_ratio_, df, score) logging.debug("[ElasticNet] alphas:") logging.debug(str(clf.alphas_)) logging.debug("[ElasticNet] MSE path:") logging.debug(str(clf.mse_path_)) nonzero = abs(clf.coef_) > 0.0 coef = np.zeros_like(clf.coef_) coef[nonzero] = (y_scaler.scale_ / A_scaler.scale_[nonzero]) * clf.coef_[nonzero] intercept = y_scaler.mean_ - np.dot(A_scaler.mean_, coef) return np.append(intercept, coef), df
def Elastic_net_fitting(block, target_otu, interest_otu, theta, train_len, cv, iteration, l_grid, output_dir): ##Select data and fitting print('Start fitting.') lib = range(block.shape[0]) coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1)) fit_results = np.empty(shape=(block.shape[0], 13)) for ipred in lib: print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True) sub_block = np.delete(block, ipred, axis=0) q = block[lib[ipred], :] ###Calculate weights E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1)) w = make_weights(E_dist, theta) ###Weighted predictors and responses X_wp = weight_data(sub_block[:, 1:], w) Y_wp = np.ravel(weight_data(sub_block[:, 0], w)) X_target = block[ipred, 1:] Y_target = block[ipred, 0] ##Split training and test data pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False) X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0) X_test = X_wp[pick_test, :] Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target) Y_test = Y_wp[pick_test] ###Fit function regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration, l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))]) regr.fit(X_train, Y_train) rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2)) rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2)) coefs[ipred, :] = regr.coef_ fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std( Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max( Y_test), min(Y_test) print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True) # Output results coefs = pd.DataFrame(data=coefs) coefs.to_csv('/'.join([output_dir,'coefs/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)])) fit_results = pd.DataFrame( columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score', 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'], data=fit_results) fit_results.to_csv('/'.join([output_dir,'fit_result/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)]))
def enetCV(): print ("Doing elastic net") cross_val = cross_validation.ShuffleSplit(len(base_X), n_iter=5, test_size=0.2, random_state=0) clf4 = ElasticNetCV(cv=cross_val) clf4.fit(base_X, base_Y) print ("Score = %f" % clf4.score(base_X, base_Y)) clf4_pred = clf4.predict(X_test) write_to_file("elasticCV.csv", clf4_pred)
def elasticnet_reg(x, y): elasticnetcv = ElasticNetCV(cv=20) elasticnetcv.fit(x, y) elasticnetcv_score = elasticnetcv.score(x, y) elasticnetcv_alpha = elasticnetcv.alpha_ print('ElasticNet R square', elasticnetcv_score) print('ElasticNet Alpha', elasticnetcv_alpha) return elasticnetcv.coef_
def score(inEval, X, y): indMatrix = pd.DataFrame() listEval = list(inEval) indMatrix = evaluatedMatrix(listEval, X) # Linear regression with elastic net #regr = ElasticNet(random_state=0, l1_ratio=0, alpha = 1) regr = ElasticNetCV(random_state=0) regr.fit(indMatrix, y) return (regr.score(indMatrix, y))
def score(inEval, X, y): indMatrix = pd.DataFrame() listEval = list(inEval) indMatrix = evaluatedMatrix(listEval, X) try: # Linear regression with elastic net regr = ElasticNetCV(random_state=0) regr.fit(indMatrix,y) return (regr.score(indMatrix,y)) except ValueError: print(indMatrix)#
def _train_enet(y, Z, X, include_ses=False, p_threshold=0.01): log = logging.getLogger(pyfocus.LOG) try: from limix.qc import normalise_covariance from sklearn.linear_model import ElasticNetCV except ImportError as ie: log.error( "Training submodule requires limix>=2.0.0 and sklearn to be installed." ) raise from scipy.linalg import lstsq log.debug("Initializing ElasticNet model") n = len(y) attrs = dict() K_cis = np.dot(Z, Z.T) K_cis = normalise_covariance(K_cis) fe_var, s2u, s2e, logl, fixed_betas, pval = _fit_cis_herit(y, K_cis, X) if pval > p_threshold: log.info("h2g pvalue {} greater than threshold {}. Skipping".format( pval, p_threshold)) return None h2g = s2u / (s2u + s2e + fe_var) attrs["h2g"] = h2g attrs["h2g.logl"] = logl attrs["h2g.pvalue"] = pval # we only want to penalize SNP effects and not covariate effects... fixed_betas, sum_resid, ranks, svals = lstsq(X, y) yresid = y - np.dot(X, fixed_betas) enet = ElasticNetCV(l1_ratio=0.5, fit_intercept=True, cv=5) enet.fit(Z, yresid) betas = enet.coef_ attrs["r2"] = enet.score(Z, yresid) attrs["resid.var"] = sum((yresid - enet.predict(Z))**2) / (n - 1) if include_ses: # TODO: bootstrap? ses = None else: ses = None return betas, ses, attrs
def regress(x, y, title): clf = ElasticNetCV(max_iter=200, cv=10, l1_ratio = [.1, .5, .7, .9, .95, .99, 1]) clf.fit(x, y) print "Score", clf.score(x, y) pred = clf.predict(x) plt.title("Scatter plot of prediction and " + title) plt.xlabel("Prediction") plt.ylabel("Target") plt.scatter(y, pred) # Show perfect fit line if "Boston" in title: plt.plot(y, y, label="Perfect Fit") plt.legend() plt.grid(True) plt.show()
def fit_elasticnet(data, targets, permute=True): """ Elasticnet regression """ cv = ElasticNetCV() cv.fit(StandardScaler().fit_transform(data.values), targets) params = {"alpha":cv.alpha_, "l1_ratio":cv.l1_ratio_} score = cv.score(StandardScaler().fit_transform(data.values), targets) if permute == True: p = permutation_test_score( cv, data, targets, # cv=10, n_jobs=3, n_permutations=1000, ) return params, score, p[-1] else: return params, score, -1
def elastic_net(df): k_val = 40 df = pd.get_dummies(df) df = remove_high_correlation(df) df = select_k_best(df, k_val) X = df.drop('DaysFromFirstDate', axis=1) y = df['DaysFromFirstDate'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42) sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.fit_transform(X_test) en = ElasticNetCV(cv=20, tol=.01) en.fit(X_train, y_train) pred = en.predict(X_test) print('Elastic Net Regression: ' + str(k_val)) print(mean_squared_error(y_test, pred)) print(en.score(X_test, y_test)) scores = cross_val_score(en, X_train, y_train, cv=20) print(scores) print(scores.mean())
def train_elasticNetCV(self, data): train, validacion = data x_tr, y_tr = train x_val, y_val = validacion #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1])) #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1])) print('Start training ElasticNetCV...') start_time = self.timer() enet = ElasticNetCV(normalize=True, n_alphas=2000, max_iter=2000, cv=10) enet.fit(x_tr, y_tr) print("The R2 is: {}".format(enet.score(x_tr, y_tr))) print("The alpha choose by CV is:{}".format(enet.alpha_)) self.timer(start_time) print("Making prediction on validation data") y_val = np.expm1(y_val) y_val_pred = np.expm1(enet.predict(x_val)) mae = mean_absolute_error(y_val, y_val_pred) print("El mean absolute error de es {}".format(mae)) print('Saving model into a pickle') try: os.mkdir('pickles') except: pass with open('pickles/enetCV.pkl', 'wb') as f: pickle.dump(enet, f) print('Making prediction and saving into a csv') y_test = enet.predict(self.x_test) return y_test
def elasticNet(self, X, y, cv=5): emcv = ElasticNetCV(fit_intercept=True) err = 0.0 scores = [] for train_idx, test_idx in cv: emcv.fit(X[train_idx], y[train_idx]) score = emcv.score(X[test_idx], y[test_idx]) p = emcv.predict(X[test_idx]) diff = p - y[test_idx] err += np.dot(diff, diff) scores.append(score) rmse = np.sqrt(err / len(y)) #print "-- elastic net rmse : ", rmse, np.mean(scores) return emcv, scores, rmse
def elastic_net_reg(): from sklearn.linear_model import ElasticNetCV n_alphas = 300 l1_ratio = [.1, .3, .5, .7, .9] rr = ElasticNetCV(n_alphas=n_alphas, l1_ratio=l1_ratio, cv=10, random_state=0) rr.fit(X_train_scaled, y_train) y_pred_train = rr.predict(X_train_scaled) #y_pred_train_round = np.round(y_pred_train) y_pred_test = rr.predict(X_test_scaled) #y_pred_test_round = np.round(y_pred_test) print(rr.alpha_, rr.l1_ratio_) print(rr.score(X_test_scaled, y_test)) #plot_conf_mat(y_test, _pred_round) global metrics_en metrics_en = [ accuracy_score(y_test, np.round(y_pred_test)), mean_squared_error(y_test, y_pred_test), r2_score(y_test, y_pred_test) ] return scores_results(y_train, y_test, y_pred_train, y_pred_test)
def ElasticNet_CV(X, Y, K_Fold, factor_string): factor = [] Y = Y.fillna(0) X = X.fillna(0) X = np.array(X) Y = np.array(Y) Y = np.ravel(Y) ElasticNet_cv = ElasticNetCV(fit_intercept=True, random_state=0) k_fold = KFold(K_Fold) mean_alpha = [] # Print alphas and scores print( "Alpha parameters maximising the generalization score on different subsets of the data:" ) for k, (train, test) in enumerate(k_fold.split(X, Y)): ElasticNet_cv.fit(X[train], Y[train]) print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".format( k + 1, ElasticNet_cv.alpha_, ElasticNet_cv.score(X[test], Y[test]))) mean_alpha.append(ElasticNet_cv.alpha_) # Averaged alpha mean_alpha = np.mean(mean_alpha) regr = ElasticNet(alpha=mean_alpha, fit_intercept=True, random_state=0) result = regr.fit(X, Y) print("Mean alpha: %f" % mean_alpha) print("\nIntercept: %f" % result.intercept_) for i in range(len(factor_string)): print("Coeffcients of %s : %f" % (factor_string[i], result.coef_[i])) if abs(result.coef_[i] - 0) >= 0.00001: factor.append(factor_string[i]) N = X.shape[0] k = X.shape[1] score = result.score(X, Y) print("Score: ", score) print("Remaining factors: %s" % factor) return result.intercept_, result.coef_, mean_alpha
predicted=lr.predict(X) '''validation''' kf=KFold(len(X),n_folds=5) p=np.zeros_like(y) for train,test in kf: lr.fit(X[train],y[train]) p[test]=lr.predict(X[test]) rmse_cv=np.sqrt(mean_squared_error(p,y)) print "RMSE of 5-fold cv {:.2}".format(rmse_cv) '''ElasticNet''' from sklearn.linear_model import ElasticNetCV met=ElasticNetCV(n_jobs=-1) p=np.zeros_like(y) for t,tst in kf: met.fit(X[t],y[t]) p[tst]=met.predict(X[tst]) p2=r2_score(y,p) print met.score(X,y) print p2,"Elastic" exit() plt.scatter(predicted,y) plt.xlabel("Predicted") plt.ylabel("Actual ") plt.plot([y.min(),y.max()],[[y.min()],[y.max()]]) plt.show()
#Replace all 0 with a minimum value close to zero to resolve log(0) issue x_scaled = replaceZeroes(x_scaled) test = pd.DataFrame(x_scaled) # Renaming the dataset columns # test.columns = ['X1','X2','X3','X4','X5','y'] XColsSize = test.shape[1] - 1 XColsName = ['X{}'.format(x + 1) for x in range(0, XColsSize)] FFXColsName = np.copy(XColsName) XColsName.append('y') XColsName test.columns = XColsName X = test.iloc[:, :-1] y = test.iloc[:, -1] # create training and testing datasets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) print(X_train.shape, y_train.shape) print(X_test.shape, y_test.shape) # Elastic net without GA regr = ElasticNetCV(random_state=0, cv=5) regr.fit(X_train, y_train) regr.score(X_train, y_train) regr.score(X_test, y_test)
model.l1_ratio_ cdsw.track_metric("l1_ratio", model.l1_ratio_) model.alpha_ cdsw.track_metric("alpha", model.alpha_) # ## Model coefficients model.intercept_ cdsw.track_metric("intercept", model.intercept_) zip(feature_cols, model.coef_) for i in range(0, len(feature_cols)): cdsw.track_metric(feature_cols[i], model.coef_[i]) # ## r squared scores r_train = model.score(train_features, train_labels) r_train cdsw.track_metric("r_train", r_train) r_test = model.score(test_features, test_labels) r_test cdsw.track_metric("r_test", r_test) # ## Persist model during experiment filename = 'bikeshare_model.pkl' joblib.dump(model, filename) cdsw.track_file(filename) #timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f') #joblib.dump(model, 'bikeshare_model_' + timestamp + '.pkl')
y = dataset.iloc[:, -1] # create training and testing datasets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) print(X_train.shape, y_train.shape) print(X_test.shape, y_test.shape) # Elastic net without GA regr = ElasticNetCV(random_state=0, cv=5) regr.fit(X_train, y_train) print("Elastic Net:") # R2 score on train data print("Train:", regr.score(X_train, y_train)) #print(regr.score(X_test, y_test)) # Sort the coeficients def sortCoef(columns, coef): nlist = [(y, x) for x, y in zip(columns, coef)] try: nlist = sorted(nlist, key=itemgetter(0), reverse=True) except ValueError: print("Error nlist:", nlist) return [val for (key, val) in nlist], [key for (key, val) in nlist] # print a number to 3 significant digits
plt.figure() plt.hist2d(y, lasso_prediction) plt.ylabel("Predicted Values") plt.xlabel("Truth Values") plt.title("Lasso Linear Regression") plt.savefig("figures/lasso_predicted_truth.png") print "#######ELASTIC#####" coef_path_elastic_cv.fit(X,y) print coef_path_elastic_cv.get_params print "alphas:" print coef_path_elastic_cv.alphas_ print "coef_:" print coef_path_elastic_cv.coef_ print "length of elastic terms:%d" % len(coef_path_elastic_cv.coef_) elastic_predict = coef_path_elastic_cv.predict(X) elastic_score = coef_path_elastic_cv.score(X,y) print "elastic_score:%.3g" % elastic_score elastic_cv_score = cross_validation.cross_val_score(coef_path_elastic_cv, X, y, n_jobs=2, cv=5) print elastic_cv_score #print "elastic precision:%.3g" % precision_score(y, elastic_predict, average='macro') plt.figure() plt.hist2d(y, elastic_predict) plt.ylabel("Predicted Values") plt.xlabel("Truth Values") plt.title("Elastic Linear Regression") plt.savefig("figures/elastic_predicted_truth.png") print "#######Logistic#####" coef_path_logistic_cv.fit(X,binary_y) print coef_path_logistic_cv.get_params print "coef_:" print coef_path_logistic_cv.coef_
def LASSO_inverse_solve(data, waves): """Function to compute the inverse solution with LASSO with positive coefficients Parameters ---------- data : numpy.ndarray Input data for the inverse problem (N_channels x T) waves : numpy.ndarray Basis waves to fit (directions_number x number_of_speeds x channels_number x timepoints_number) Returns ------- best_score : R-squared in optimum best_coefs : coefficients in optimum best_shift : starting time point in optimum best_speed_ind : index of the best speed """ import numpy as np from sklearn.linear_model import ElasticNetCV Ndir = waves.shape[0] # number of propagation directions R = data.shape[1]-waves.shape[3] + 1 # number of sliding window shifts S = waves.shape[1] # number of propagation speeds Tw = waves.shape[3] regression = ElasticNetCV(l1_ratio=1, positive=True, cv=5, max_iter=100000) # elastic net regression coefs = np.zeros([R, S, Ndir]) # regression coefficients # intercept = np.zeros([R,S]) # regression intercept score = np.zeros([R, S]) # R-squared scores nzdir = np.zeros([R, S]) # number of nonzero directions y_pred = np.zeros([R, S, data.shape[0]*data.shape[1]]) # predicted spikes for r in range(0, R): data_vec = data[:,r:(Tw+r)].flatten() for s in range(0, S): wavesspeed = waves[:, s, :, :] wavesspeed_vec = np.zeros([Ndir, data_vec.shape[0]]) for d in range(0, Ndir): wavesspeed_vec[d] = wavesspeed[d,:,:].flatten() regression.fit(wavesspeed_vec.T, data_vec) coefs[r, s, :] = regression.coef_ # intercept[r, s] = regression.intercept_ score[r, s] = regression.score(wavesspeed_vec.T, data_vec) y_pred[r, s, :] = regression.predict(wavesspeed_vec.T) nzdir[r, s] = np.sum(coefs[r, s, :] != 0) shifts_s = score.argmax(axis = 0) # best shifts for each speed score_s = score.max(axis = 0) # corresponding scores # best solution according to the number of nonzero directions # nzdir_s = np.zeros(S) # corresponding number of nonzero directions (without intercept) # for s in range(0, S): # nzdir_s[s] = nzdir[shifts_s[s], s] # # score_sort_ind = (-score_s).argsort() # indices of sorted scores for all speeds # dir_sort_ind = (nzdir_s[score_sort_ind[0:3]]).argsort() # indices of sorted number of nonzero directions for top-3 scores # # best_speed_ind = score_sort_ind[dir_sort_ind[0]] # index of best speed score_sort_ind = (-score_s).argsort() best_speed_ind = score_sort_ind[0] best_coefs = coefs[shifts_s[best_speed_ind], best_speed_ind] best_score = score_s[best_speed_ind] # R-squared value in optimum best_shift = shifts_s[best_speed_ind] # plt.figure() # plt.plot(data_vec) # plt.plot(y_pred[bestshifts[bestind], bestind, :]) # plt.title(['R-squared = ', str(finalscore)]) return [best_score, best_coefs, best_shift, best_speed_ind]
def Regularized_Smap(abund, target_otu, theta, l_grid, iteration, cv, test_len, uncontinuous): print('Process data for otu No. %s' % str(target_otu + 1)) # Make input for the elastic_net block = np.append(abund[1:, target_otu], abund[0:-1, ], axis=1) ##Delete the uncontinuous states ##Commonly, we inferer the Jacobian matrices using the continuous time series. However, if you don't have enough time points but have the replicate time series from independet reactors. ##You can combine the replicate OTU tables as the input but delete uncontinuous states in the block. if uncontinuous == True: block = np.delete(block, [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], axis=0) ##Triplicate time series are used as example, so we remove two uncontiunous states, i.e., [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], in the block. ##You can also specify the list of uncontiunous states in the block using the following line. Remember to uncomment the the following line and delete line 55. # block = np.delete(block, [uncontiunous states], axis=0) ##The [uncontiunous states] can be like [34, 55] or [1, 2, 10] as you need it to be. ##Scaling the input ##Each time series is normalized to have a mean of 0 and standard deviation of 1 before analysis with S-maps block = (block - np.average(block, axis=0)) / np.std(block, axis=0) ##Select data and fitting print('Start fitting.') lib = range(block.shape[0]) coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1)) fit_results = np.empty(shape=(block.shape[0], 13)) for ipred in lib: print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True) sub_block = np.delete(block, ipred, axis=0) q = block[lib[ipred], :] ###Calculate weights E_dist = np.sqrt( np.sum(np.array(sub_block[:, 1:] - q[:, 1:])**2, axis=1)) w = make_weights(E_dist, theta) ###Weighted predictors and responses X_wp = weight_data(sub_block[:, 1:], w) Y_wp = np.ravel(weight_data(sub_block[:, 0], w)) X_target = block[ipred, 1:] Y_target = block[ipred, 0] ##Split training and test data pick_test = np.random.choice(range(X_wp.shape[0]), size=test_len, replace=False) X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0) X_test = X_wp[pick_test, :] Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target) Y_test = Y_wp[pick_test] ###Fit function regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration, l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))]) regr.fit(X_train, Y_train) rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train)**2)) rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test)**2)) coefs[ipred, :] = regr.coef_ fit_results[ ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std( Y_train), rmse_o, np.std(Y_test), regr.score( X_test, Y_test), regr.score(X_train, Y_train), max( Y_train), min(Y_train), max(Y_test), min(Y_test) print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True) # Output results coefs = pd.DataFrame(data=coefs) coefs.to_csv('/'.join( [output_dir, 'coefs/%s_%s_coefs.csv' % (target_otu, theta)])) fit_results = pd.DataFrame(columns=[ 'Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score', 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test' ], data=fit_results) fit_results.to_csv('/'.join( [output_dir, 'fit_result/%s_%s_fit_results.csv' % (target_otu, theta)]))
def elastnet_reg(X_train, X_test, y_train, y_test, poly_name=False, features=None, cv=10): ''' Function for building an elastic net regression model. Stores: - Model name - Number of coefficients - Best alpha parameter - Best l1 parameter - R-squared train score - Cross-validation r-squared score - R-squared test score - Root mean squared error (RMSE) ''' # model name if poly_name == False: name = 'Elastic Net Regression Model ({})'.format( str(features) + ' features') else: name = 'Polynomial Elastic Net Regression Model ({})'.format( str(features) + ' features') # Cross-validate model elast_net = ElasticNetCV(n_alphas=100, l1_ratio=np.linspace(0.01, 1, 20), max_iter=1000000, random_state=1, cv=3) cv_scores = cross_val_score(elast_net, X_train, y_train, cv=cv) cv_r2_mean = np.mean(cv_scores) cv_r2_std = np.std(cv_scores) # fit model elast_net.fit(X_train, y_train) # number of coefficients coefs = len(elast_net.coef_) # best alpha and l1 ratio best_alpha = elast_net.alpha_ best_l1 = elast_net.l1_ratio_ # make predictions y_pred = elast_net.predict(X_test) # evaluate performance on train and test data r2_train = elast_net.score(X_train, y_train) r2_test = elast_net.score(X_test, y_test) rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred)) # save results in dictionary results_dict = { 'Model': name, 'Alpha': best_alpha, 'L1': best_l1, 'No. of coefs:': coefs, 'R^2 train': r2_train, 'Cross-Validation R^2 scores': cv_scores, 'Cross-Validation R^2 mean': cv_r2_mean, 'Cross-Validation R^2 std': cv_r2_std, 'R^2 test': r2_test, 'RMSE': rmse } return elast_net, results_dict
stack_val_mse = mean_squared_error(ytestv, stack_val_pred) stack_test_mse = mean_squared_error(ytest, stack_test_pred) print("RMSE using StackRegressor:\t{}\t{}\t{}\n".format( np.sqrt(stack_train_mse), np.sqrt(stack_val_mse), np.sqrt(stack_test_mse))) df_rf = pd.DataFrame({'Actual': ytest, 'Predicted': stack_test_pred}) fig1 = pp.figure(figsize=(8, 6)) df_rf.head(n=300).plot() pp.legend() pp.title("StackRegressor Actual v/s Predicted Annual Rainfall") pp.xticks(()) pp.yticks(()) pp.show() print(rfreg.score(Xtest, ytest), elastic.score(Xtest, ytest), stack.score(Xtest, ytest)) # CLASSIFICATION & CLUSTERING METHODS # Data splitting X = np.array(raindf[['JAN-FEB', 'MAR-MAY', 'JUN-SEP', 'OCT-DEC']]) y = np.array(raindf['REGION']) le = preprocessing.LabelEncoder() le.fit(y) y = le.fit_transform(y) Xreduced = PCA(n_components=2).fit_transform(X) results = pd.DataFrame(Xreduced, columns=['pca1', 'pca2'])
#%% #try elastic net #alpha equals lambda here lambda_grid = [0.01, 0.1 , 1, 10,100] l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9] enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,cv=3,n_jobs=-1,verbose=True) enet_CV.fit(train_X,train_Y) #%% #show enet_CV.score(test_X,test_Y) plt.plot(enet_CV.predict(test_X),test_Y,'o') #%% #try svr svr = SVR(kernel = 'rbf',C=1,cache_size=2000) SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] } svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1) svr.fit(train_X[:,whichones[0]],train_Y) #%% #try bagging/boosting etc #rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)
plt.hist(y, 20) Xs = np.nan_to_num(Xso, 0) fx = StandardScaler() X2 = Xs #fx.fit_transform(Xs) y = np.array(y).astype(float) reg = ElasticNetCV( [.1, .7, .725, .75, .775, .8, .9, .95, .99, 1], cv=10, positive=True, max_iter=1e4) #(alpha=0.1,l1_ratio=0.7)#CV(cv=10)#ElasticNetCV(.7,cv=10,) #reg = lgb.LGBMRegressor() reg.fit(X2, y) print(Xs.shape, reg.score(X2, y)) plt.style.use('seaborn-white') plt.scatter(reg.predict(X2), y, s=10, alpha=0.5) plt.ylim(-60, 60) plt.xlim(-60, 60) plt.xlabel('predicted margin') plt.ylabel('actual margin') #sorted([(i,n) for i,n in zip(reg.feature_importances_,exp_lbl) if 'LB' in n],reverse=True) reg.l1_ratio_ exp_lbl = sum([[str(p) + '_' + str(s) for s in valid_col] for p in valid_pos], [])
#%% #try elastic net #alpha equals lambda here lambda_grid = [0.01, 0.1 , 1, 10,100] l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9] enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,alphas=lambda_grid,cv=3,n_jobs=-1,verbose=True) enet_CV.fit(train_X,train_Y) #%% #show enet_CV.score(test_X,test_Y) plt.plot(enet_CV.predict(test_X),test_Y,'o') #%% #try svr svr = SVR(kernel = 'rbf',C=1,cache_size=2000) SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] } svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1) svr.fit(train_X[:,whichones[0]],train_Y) #%% #try bagging/boosting etc #rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)
md=dnn_reg(X_train,y_train,X_test,y_test) reg_eval(X_test,y_test,md) ###Lasso CV regression def reg_eval2(y_test,model): y_pred=model.predict(X_test) print("evaluation the results for model:",model) print("MSE:",mean_squared_error(y_test,y_pred)) print("R2:",r2_score(y_test,y_pred)) print("EVS:",explained_variance_score(y_test,y_pred)) lasso = LassoCV(cv=5, random_state=0,max_iter=10000) lasso.fit(X_train,y_train) reg_eval2(y_test,lasso) #ElasticNet Regressionb ela = ElasticNetCV(l1_ratio=0.8,normalize=True,max_iter=5000,random_state=77) ela.fit(X_train,y_train) print("R square:",ela.score(X_test,y_test)) reg_eval2(y_test,ela) #SVR Regression from sklearn.svm import LinearSVR LSVR=LinearSVR(epsilon=0.1,random_state=0, tol=1e-5,max_iter=10000) # scaler=RobustScaler() # pipe=Pipeline(steps=[("scaling",scaler),("rg",LSVR)]) LSVR.fit(X_train,y_train) reg_eval2(y_test,LSVR))
) #train the algorithm on training data and predict using the testing data y_predelast = elast.predict(X_test) print('Betas: ', list(zip(elast.coef_, X))) print('Beta0: %.2f' % elast.intercept_) #Beta0 # 5.1.4.1 Elastic Net Regression with crossvalidation to calculate optimal alpha elastcv = ElasticNetCV(cv=5, random_state=42) pred_elastcv = elastcv.fit(X_train, y_train).predict( X_test ) #train the algorithm on training data and predict using the testing data y_predelastcv = elastcv.predict(X_test) print('Optimal Aplha Value: ', elastcv.alpha_) print('Betas: ', list(zip(elastcv.coef_, X))) print('Beta0: %.2f' % elastcv.intercept_) #Beta0 print('R²: %.2f' % elastcv.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predelastcv)) # 5.1.5 Robust regression '''Robust regression aims to fit a regression model in the presence of corrupt data: either outliers, or error in the model HuberRegressor should be faster than RANSAC and Theil Sen unless the number of samples are very large, i.e n_samples >> n_features. This is because RANSAC and Theil Sen fit on smaller subsets of the data. However, both Theil Sen and RANSAC are unlikely to be as robust as HuberRegressor for the default parameters. RANSAC is faster than Theil Sen and scales much better with the number of samples. RANSAC will deal better with large outliers in the y direction (most common situation).
#%% interesting_ones = ['G13','G14','G15','G19','G21'] r2_mfcc = [] r2_stft = [] for chan in interesting_ones: y = Y[:,electrode_names.index(chan)] train_X,test_X,train_Y,test_Y = train_test_split(np.hstack([mfcc_X,X]),y,test_size=0.3) mfcctrain_X = train_X[:,:325] train_X = train_X[:,325:] l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9] enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,n_jobs=-1,verbose=True) enet_CV.fit(train_X,train_Y) r2_stft.append(enet_CV.score(test_X[:,325:],test_Y)) enet_CV.fit(mfcctrain_X,train_Y) r2_mfcc.append(enet_CV.score(test_X[:,:325],test_Y)) #%% #%for standardizing in lagged stimuli space scaler = preprocessing.StandardScaler() scaler.fit(X) X = scaler.transform(X) #add intercept #X = np.hstack((np.ones(X.shape[0])[:,None],X)) yscaler = preprocessing.StandardScaler()