Пример #1
0
def rfecv_sklearn_example(image="rfecv_sklearn_example.png"):
    X, y = make_classification(
        n_samples=1000, n_features=25, n_informative=3, n_redundant=2,
        n_repeated=0, n_classes=8, n_clusters_per_class=1, random_state=0
    )

    _, ax = plt.subplots()

    oz = RFECV(SVC(kernel='linear', C=1), ax=ax)
    oz.fit(X, y)
    oz.poof(outpath=os.path.join(IMAGES, image))
Пример #2
0
def rfecv_credit_example(image="rfecv_credit.png"):
    data = pd.read_csv(os.path.join(DATA, "credit", "credit.csv"))

    target = "default"
    features = [col for col in data.columns if col != target]

    X = data[features]
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(5)
    oz = RFECV(RandomForestClassifier(), ax=ax, cv=cv, scoring='f1_weighted')
    oz.fit(X, y)
    oz.poof(outpath=os.path.join(IMAGES, image))
Пример #3
0
def fazer_selecao_features_rfe(modelo):
    features = dados_completo.columns
    rfe = RFECV(modelo, cv=kfold, scoring=scoring)

    rfe.fit(dados_completo.drop(['classe'], axis=1),
            dados_completo['classe'].values)
    print(rfe.poof())
    print("Caraterísticas ordenadas pelo rank RFE:")
    print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features)))
    ranking = sorted(zip(rfe.support_, features))
    print("Características selecionadas", ranking)
    return rfe.transform(dados_completo.drop(['classe'], axis=1))
Пример #4
0
def rfecv_credit_example(image="rfecv_credit.png"):
    data = pd.read_csv(os.path.join(DATA, "credit", "credit.csv"))

    target = "default"
    features = [col for col in data.columns if col != target]

    X = data[features]
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(5)
    oz = RFECV(RandomForestClassifier(), ax=ax, cv=cv, scoring='f1_weighted')
    oz.fit(X, y)
    oz.poof(outpath=os.path.join(IMAGES, image))
def fazer_selecao_features_rfe():
    features = X_treino.columns
    rfe = RFECV(RandomForestClassifier(random_state=random_state,
                                       oob_score=True),
                cv=kfold,
                scoring='accuracy')

    rfe.fit(X_treino, Y_treino.values.ravel())
    print(rfe.poof())
    print("Caraterísticas ordenadas pelo rank RFE:")
    print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features)))
    ranking = sorted(zip(rfe.support_, features))
    print("Características selecionadas", ranking)
    return rfe.transform(X_treino)
Пример #6
0
def rfecv_sklearn_example(image="rfecv_sklearn_example.png"):
    X, y = make_classification(n_samples=1000,
                               n_features=25,
                               n_informative=3,
                               n_redundant=2,
                               n_repeated=0,
                               n_classes=8,
                               n_clusters_per_class=1,
                               random_state=0)

    _, ax = plt.subplots()

    oz = RFECV(SVC(kernel='linear', C=1), ax=ax)
    oz.fit(X, y)
    oz.poof(outpath=os.path.join(IMAGES, image))
Пример #7
0
def feature_selection(model, features, X, y):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import StratifiedKFold
    from yellowbrick.features import RFECV

    # df = load_data('credit')

    # features = [col for col in data.columns if col != target]

    # X = data[features]
    # y = data[target]

    cv = StratifiedKFold(5)
    oz = RFECV(model, cv=cv, scoring='f1_weighted')

    oz.fit(X, y)
    oz.poof()
def fazer_selecao_features_rfe():
    features = X_treino.columns
    rfe = RFECV(RandomForestClassifier(random_state=random_state,
                                       oob_score=True,
                                       n_estimators=250,
                                       criterion='entropy',
                                       max_depth=75,
                                       max_features='log2',
                                       min_samples_leaf=1,
                                       min_samples_split=2),
                cv=kfold,
                scoring='accuracy')

    rfe.fit(X_treino, Y_treino.values.ravel())
    print(rfe.poof())
    print("Caraterísticas ordenadas pelo rank RFE:")
    print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features)))
    ranking = sorted(zip(rfe.support_, features))
    print("Características selecionadas", ranking)
    return rfe.transform(X_treino)
Пример #9
0
# %%
feat = feature_names[feat][np.mean(abs(shap_values), axis=0) > 1]
print(feat)
X = X[feat]

# %%
test_size = 0.33
cv = 3
Xt, Xv, yt, yv = \
    sklearn.model_selection.train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=seed)
viz = RFECV(XGBClassifier(
    max_depth=7,
    n_estimators=1000,
    scale_pos_weight=50000,
    subsample=0.5,
    colsample_bylevel=0.5,
    colsample_bytree=0.5,
    seed=seed,
),
            cv=cv)
viz.fit(Xv, yv)
viz.poof()

# %%
feat = feat[viz.ranking_ < 5]
print(feat)
X = X[feat]

# %%
test_size = 0.33
cv = 3