def feature_selection(x_train,y_train,k=50): plot_corr(x_train) reduced_features = percentile_k_features(x_train,y_train,k) # print(reduced_features) return reduced_features
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"): _, _, linear_model_scores = linear_model(X_train, X_test, y_train, y_test, 0.01) _, _, lasso_scores = lasso(X_train, X_test, y_train, y_test) _, _, ridge_scores = ridge(X_train, X_test, y_train, y_test) selected_features = percentile_k_features(X_train, y_train, k=50) X_train_features = X_train[selected_features] X_test_features = X_test[selected_features] _, _, linear_model_scores_features = linear_model(X_train_features, X_test_features, y_train, y_test, 0.01) _, _, lasso_scores_features = lasso(X_train_features, X_test_features, y_train, y_test) _, _, ridge_scores_features = ridge(X_train_features, X_test_features, y_train, y_test) complete_stats = pd.concat([ linear_model_scores, linear_model_scores_features, #chain_stats_with_features_selection ,stats_chain, lasso_scores, lasso_scores_features, ridge_scores, ridge_scores_features ]) complete_stats.index = [ 'linear_model_scores', 'linear_model_scores_features', #chain_stats_with_features_selection ,stats_chain, 'lasso_scores', 'lasso_scores_features', 'ridge_scores', 'ridge_scores_features' ] #complete_stats.columns=['Name', 'cross_validation','rmse','mae','r2'] complete_stats.mse = complete_stats.mse.fillna(0) complete_stats.rmse = complete_stats.rmse.fillna(0) complete_stats.rmse = complete_stats.mse + complete_stats.rmse complete_stats = complete_stats.drop(['mse'], axis=1) return complete_stats
def feature_selection(X,y,k=50): feat = percentile_k_features(X, y, k) return feat
def feature_selection(x_train,y_train,k=50): plot_corr(pd.concat([x_train,y_train],axis=1)) ans = percentile_k_features(x_train,y_train,k) return ans
def pick_features(X, y, k=50): k_best_features = percentile_k_features(X, y, k) return k_best_features
def feature_selection(X, y, k=50): plot_corr(pd.concat([X, y], axis=1)) lst = percentile_k_features(X, y, k=50) return lst
def feature_selection(x_train, y_train, k=50): plot_corr(x_train, 11) features= percentile_k_features(x_train, y_train, k=50) return features
def feature_selection(X, y, k=50): return percentile_k_features(X, y, k)
def feature_selection(x_train, y_train, k=50): plot_corr(df, 11) top_fea = percentile_k_features(x_train, y_train, 50) return top_fea
def feature_selection(X, y, k=50): plot_corr(X) return percentile_k_features(X, y, k)