def rfecv_sklearn_example(image="rfecv_sklearn_example.png"): X, y = make_classification( n_samples=1000, n_features=25, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, n_clusters_per_class=1, random_state=0 ) _, ax = plt.subplots() oz = RFECV(SVC(kernel='linear', C=1), ax=ax) oz.fit(X, y) oz.poof(outpath=os.path.join(IMAGES, image))
def rfecv_credit_example(image="rfecv_credit.png"): data = pd.read_csv(os.path.join(DATA, "credit", "credit.csv")) target = "default" features = [col for col in data.columns if col != target] X = data[features] y = data[target] _, ax = plt.subplots() cv = StratifiedKFold(5) oz = RFECV(RandomForestClassifier(), ax=ax, cv=cv, scoring='f1_weighted') oz.fit(X, y) oz.poof(outpath=os.path.join(IMAGES, image))
def fazer_selecao_features_rfe(modelo): features = dados_completo.columns rfe = RFECV(modelo, cv=kfold, scoring=scoring) rfe.fit(dados_completo.drop(['classe'], axis=1), dados_completo['classe'].values) print(rfe.poof()) print("Caraterísticas ordenadas pelo rank RFE:") print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features))) ranking = sorted(zip(rfe.support_, features)) print("Características selecionadas", ranking) return rfe.transform(dados_completo.drop(['classe'], axis=1))
def fazer_selecao_features_rfe(): features = X_treino.columns rfe = RFECV(RandomForestClassifier(random_state=random_state, oob_score=True), cv=kfold, scoring='accuracy') rfe.fit(X_treino, Y_treino.values.ravel()) print(rfe.poof()) print("Caraterísticas ordenadas pelo rank RFE:") print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features))) ranking = sorted(zip(rfe.support_, features)) print("Características selecionadas", ranking) return rfe.transform(X_treino)
def rfecv_sklearn_example(image="rfecv_sklearn_example.png"): X, y = make_classification(n_samples=1000, n_features=25, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, n_clusters_per_class=1, random_state=0) _, ax = plt.subplots() oz = RFECV(SVC(kernel='linear', C=1), ax=ax) oz.fit(X, y) oz.poof(outpath=os.path.join(IMAGES, image))
def feature_selection(model, features, X, y): from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import StratifiedKFold from yellowbrick.features import RFECV # df = load_data('credit') # features = [col for col in data.columns if col != target] # X = data[features] # y = data[target] cv = StratifiedKFold(5) oz = RFECV(model, cv=cv, scoring='f1_weighted') oz.fit(X, y) oz.poof()
def fazer_selecao_features_rfe(): features = X_treino.columns rfe = RFECV(RandomForestClassifier(random_state=random_state, oob_score=True, n_estimators=250, criterion='entropy', max_depth=75, max_features='log2', min_samples_leaf=1, min_samples_split=2), cv=kfold, scoring='accuracy') rfe.fit(X_treino, Y_treino.values.ravel()) print(rfe.poof()) print("Caraterísticas ordenadas pelo rank RFE:") print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features))) ranking = sorted(zip(rfe.support_, features)) print("Características selecionadas", ranking) return rfe.transform(X_treino)
# %% feat = feature_names[feat][np.mean(abs(shap_values), axis=0) > 1] print(feat) X = X[feat] # %% test_size = 0.33 cv = 3 Xt, Xv, yt, yv = \ sklearn.model_selection.train_test_split( X, y, test_size=test_size, stratify=y, random_state=seed) viz = RFECV(XGBClassifier( max_depth=7, n_estimators=1000, scale_pos_weight=50000, subsample=0.5, colsample_bylevel=0.5, colsample_bytree=0.5, seed=seed, ), cv=cv) viz.fit(Xv, yv) viz.poof() # %% feat = feat[viz.ranking_ < 5] print(feat) X = X[feat] # %% test_size = 0.33 cv = 3