Python sfs示例，mlxtend.feature_selection.sfs Python示例

示例#1

0

显示文件

def runFeatures_KNN(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("run feature method")
    print("Number of features Selected KNN : ", numberOfFeature)
    KNN = KNeighborsClassifier(n_neighbors=8, p=3)
    sfs1 = sfs(KNN,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features KNN are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_KNN.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = KNN_evaluation_procedure(KNN, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_KNN(X_valid, Y_valid)
    return str1, train_acc, val_acc

示例#2

0

显示文件

def do_feature_selection(model, trainAndValidation, trainAndValidation_y, minFeatures, maxFeatures, mainFeatures,
                         fixed_features, focal_class):
    validation_indices = trainAndValidation[trainAndValidation.set_annotation == 'validation'].index
    validSet = PredefinedHoldoutSplit(validation_indices)
    trainAndValidation_y = np.where(trainAndValidation_y == focal_class, 1, 0)
    X, y = trainAndValidation[mainFeatures], trainAndValidation_y
    results = {}
    for totFeatures in range(minFeatures, maxFeatures):
        # Build step forward feature selection
        curSFS = sfs(model, k_features=totFeatures, forward=True,
                     verbose=0,
                     # If 0, no output,
                     # if 1 number of features in current set,
                     # if 2 detailed logging including timestamp and cv scores at step.
                     scoring=auc_scorer,  # 'roc_auc',
                     cv=validSet, n_jobs=1,
                     fixed_features=fixed_features)
        curSFS = curSFS.fit(X, y)
        feat_cols = list(curSFS.k_feature_idx_)
        sel = []
        for i in feat_cols:
            sel.append(mainFeatures[i])
        key = ",".join(sorted(sel))
        if key in results: print("error")
        results[key] = curSFS.k_score_
    return results

示例#3

0

显示文件

def forward_feature_selection(x_data, y_data, n_select):
    print("Applying forward feature selection to numerical data")
    print(
        f"cat variables before forward feature selection {x_data.select_dtypes(include='object').shape}"
    )
    print(
        f"numeric variables before forward feature selection {x_data.select_dtypes(include='number').shape}"
    )
    num_cols = x_data.select_dtypes(include='number').columns
    temp = x_data[num_cols]
    sfsf = sfs(RandomForestRegressor(n_jobs=5),
               k_features=n_select,
               forward=True,
               floating=False,
               verbose=2,
               cv=3,
               scoring='r2')
    sfsf.fit(temp, y_data)
    idx = sfsf.k_feature_idx_
    idx = list(idx)
    cols_to_keep = num_cols[idx]

    cols_to_drop = [x for x in num_cols if x not in cols_to_keep]
    x_data.drop(labels=cols_to_drop, axis=1, inplace=True)

    print(
        f"cat variables after forward feature selection {x_data.select_dtypes(include='object').columns}"
    )
    print(
        f"numeric variables after forward feature selection {x_data.select_dtypes(include='number').columns}"
    )
    return x_data

示例#4

0

显示文件

def runFeatures_SVM(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected SVM : ", numberOfFeature)
    SVC1 = SVC(kernel='linear', probability=True, random_state=0)
    sfs1 = sfs(SVC1,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features SVM are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_SVM.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = SVM_evaluation_procedure(SVC1, X_train, Y_train, X_test,
                                         Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_SVM(X_valid, Y_valid)
    return str1, train_acc, val_acc

示例#5

0

显示文件

def runFeatures_LR(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected LR: ", numberOfFeature)
    LR = LogisticRegression(penalty='l1', tol=0.1, random_state=12)
    sfs1 = sfs(LR,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features LR are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_LR.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = LR_evaluation_procedure(LR, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_LR(X_valid, Y_valid)
    return str1, train_acc, val_acc

示例#6

0

显示文件

def runFeatures_RF(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected RF : ", numberOfFeature)
    RF = RandomForestClassifier(n_estimators=100,
                                random_state=1,
                                max_features='log2')
    sfs1 = sfs(RF,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features RF are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_RF.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = RF_evaluation_procedure(RF, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_RF(X_valid, Y_valid)
    print("val acc runFeatures_RF", val_acc)
    return str1, train_acc, val_acc

示例#7

0

显示文件

def do_sfs(x_tr, y_tr):
    sfs_kern = sfs(svm.SVC(kernel='rbf'),
                   k_features=n_features,
                   forward=True,
                   floating=True,
                   verbose=2,
                   scoring='accuracy',
                   cv=5)
    sfs_kern.fit(x_tr, y_tr)
    return sfs_kern

示例#8

0

显示文件

 def feature_selection(self,X,y):
     lda=LinearDiscriminantAnalysis(solver='lsqr')
     initial_list=[]
     included=list(initial_list)
     X=self.pretreat(X)
     sfs1 = sfs(lda,k_features=self.max_steps,forward=self.forw,floating=self.flot,
            verbose=0,scoring=self.score,cv=self.cvl)
     sfs1 = sfs1.fit(X, y)
     a=list(sfs1.k_feature_names_)
     return a

示例#9

0

显示文件

文件： madelon.py 项目： cyucelen/data_mining_hw2

def selectFeatures(algorithm, X_train, y_train, numberOfFeatures, isForward):
    selector = sfs(algorithm,
                   k_features=(1, numberOfFeatures),
                   forward=isForward,
                   floating=False,
                   verbose=0,
                   scoring='accuracy',
                   cv=None,
                   n_jobs=-1)
    selector.fit(X_train.values, y_train)
    return list(selector.k_feature_idx_)

示例#10

0

显示文件

文件： QDA.py 项目： sdmoran/CS4342_Project

def getBestFeaturesForQDA(trainingData):
    x = trainingData.iloc[:, 0:11]
    y = trainingData.iloc[:, 11]
    bestFeatures = sfs(
        da.QuadraticDiscriminantAnalysis(),
        k_features="best",
        forward=False,
        floating=False,
        verbose=False,
        scoring='r2',
    ).fit(x, y)
    return bestFeatures.k_feature_names_, bestFeatures.k_feature_idx_

示例#11

0

显示文件

文件： QDA.py 项目： sdmoran/CS4342_Project

def getBestFeaturesForHigherOrderTerms(trainingData, num_features):
    x = trainingData.loc[:, trainingData.columns != 'label']
    y = trainingData.loc[:, 'label']
    bestFeatures = sfs(
        da.QuadraticDiscriminantAnalysis(),
        k_features=num_features,
        forward=True,
        floating=False,
        verbose=2,
        scoring='r2',
    ).fit(x, y)
    return bestFeatures.k_feature_names_

示例#12

0

显示文件

文件： FeatureSelector.py 项目： sdmoran/CS4342_Project

def getBestFeaturesForHigherOrderTerms(clf,
                                       trainingData,
                                       num_features,
                                       scoringString='r2'):
    x = trainingData.loc[:, trainingData.columns != 'label']
    y = trainingData.loc[:, 'label']
    bestFeatures = sfs(clf,
                       k_features=num_features,
                       forward=True,
                       floating=False,
                       verbose=2,
                       scoring=scoringString,
                       n_jobs=5).fit(x, y)
    return bestFeatures.k_feature_names_

示例#13

0

显示文件

文件： Sales_Important_Variables.py 项目： goodwilrv/Sales_Important_Factors

def forward_step_feature_selection(x_train_1, y_train_1):
    # Build RF classifier to use in feature selection
    clf = RandomForestRegressor(n_estimators=100, n_jobs=-1)

    # Build step forward feature selection
    sfs1 = sfs(clf,
               k_features=10,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)
    # Perform SFFS
    sfs1 = sfs1.fit(x_train_1, y_train_1)

示例#14

0

显示文件

文件： preprocessing.py 项目： brosen255/march_madness_modeling

def fwrd_selection(scaled_X, Y):
    # Build RF classifier to use in feature selection
    clf = LogisticRegression()

    sfs1 = sfs(clf,
               k_features='best',
               forward=True,
               floating=False,
               verbose=0,
               scoring='accuracy',
               cv=5)
    sfs1 = sfs1.fit(scaled_X, Y)
    feat_cols = list(sfs1.k_feature_idx_)
    fs_vars = [scaled_X.columns[i] for i in feat_cols]
    return fs_vars

示例#15

0

显示文件

 def feature_selection(self, X, y):
     mlr = LinearRegression()
     initial_list = []
     included = list(initial_list)
     X = self.pretreat(X)
     sfs1 = sfs(mlr,
                k_features=self.max_steps,
                forward=self.forw,
                floating=self.flot,
                verbose=0,
                scoring=self.score,
                cv=self.cvl)
     sfs1 = sfs1.fit(X, y)
     a = list(sfs1.k_feature_names_)
     return a

示例#16

0

显示文件

def wrapper_forward_selection(X, y, top_feat, model):
    model_forward = sfs(model,
                        k_features=top_feat,
                        forward=True,
                        floating=False,
                        verbose=0,
                        cv=5,
                        n_jobs=-1,
                        scoring='accuracy')
    model_forward.fit(X, y)
    res = list(
        map(lambda e: e['feature_names'], model_forward.subsets_.values())
    )  # [[len0],[len1],[len2],...,[lenN-1]]
    res.sort(key=len)
    return res

示例#17

0

显示文件

文件： RegresionFunctions.py 项目： darkosarajkic997/FacebookCommentsRegression

def stepFeatureSelect(X, y, regressor, num_features=10, direction=False):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    X_train = pd.DataFrame(X_train, columns=list(X))
    X_test = pd.DataFrame(X_test, columns=list(X))
    stepF = sfs(regressor,
                k_features=num_features,
                forward=direction,
                floating=False,
                verbose=2,
                scoring='r2',
                cv=3,
                n_jobs=-1).fit(X_train, y_train)
    return FeatureSelector(stepF, X)

示例#18

0

显示文件

def select_features(model, X, y, n=10):
    """Input the number of features you want to have"""
    candidate = []
    # Build step forward feature selection
    sfs1 = sfs(model,
               k_features=n,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)
    # Perform SFFS
    sfs1 = sfs1.fit(X, y)
    # The index list of the important features
    feat_cols = list(sfs1.k_feature_idx_)
    for idx in feat_cols:
        candidate.append(X.columns[idx])
    return candidate

示例#19

0

显示文件

def WrapperAlgo(x_train, y_train):
    clsf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    # Build step forward feature selection
    sfs1 = sfs(
        clsf,
        k_features=18,
        forward=True,
        # The floating algorithms have an additional exclusion or inclusion step to remove features once they
        # were included (or excluded), so that a larger number of feature subset combinations can be sampled
        floating=False,
        verbose=2,
        scoring='accuracy',
        cv=5)

    # Perform SFFS
    sfs1 = sfs1.fit(x_train, y_train)
    # Which features?
    feat_cols = list(sfs1.k_feature_idx_)
    return feat_cols

示例#20

0

显示文件

文件： MLfinal.py 项目： cbintz/UPDATED_TCGA

def selectFeatures30(X, Y):
    """ Select 30 features using step forward selection"""
    # Build RF classifier to use in feature selection
    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)

    # Build step forward feature selection
    sfs1 = sfs(clf,
               k_features=30,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)

    # Perform SFS
    sfs1 = sfs1.fit(X, Y)
    feat_cols = list(sfs1.k_feature_idx_)
    print(feat_cols)
    return sfs1

示例#21

0

显示文件

def run_sffs(X_train, X_test, y_train, y_test, clf, normalize, k_features, cv):
    if normalize == 'yes':
        X_train, X_test = normalize_features(X_train, X_test)
    print('Starting SFFS Dimensionality Reduction ..')
    start = time.time()
    sfs1 = sfs(clf,
               k_features=k_features,
               forward=True,
               floating=True,
               verbose=2,
               scoring='accuracy',
               cv=cv,
               n_jobs=-1)
    sfs1 = sfs1.fit(X_train, y_train)

    feat_cols = list(sfs1.k_feature_idx_)
    end = time.time()
    print('\nSFFS done in', end - start, 'seconds\n')
    print('Reduced dimension : ', len(feat_cols))
    return X_train[:, feat_cols], X_test[:, feat_cols]

示例#22

0

显示文件

文件： main.py 项目： alhankeser/kaggle-petfinder

 def forward_selection(cls, df, features_count=1):
     if df.name == 'train':
         qwk_scorer = make_scorer(cls.quadratic_weighted_kappa,
                                  greater_is_better=True)
         model = RandomForestClassifier(n_estimators=100, n_jobs=-1)
         X = df.drop('AdoptionSpeed', axis=1)
         y = df['AdoptionSpeed']
         X_train, X_test,\
             y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                random_state=42)
         y_train = y_train.ravel()
         y_test = y_test.ravel()
         sfs1 = sfs(model,
                    k_features=3,
                    forward=True,
                    floating=False,
                    verbose=2,
                    scoring=qwk_scorer,
                    cv=5)
         sfs1 = sfs1.fit(X_train, y_train)
         best_cols = list(sfs1.k_feature_idx_)
     return best_cols

示例#23

0

显示文件

文件： FeatureSelection_v2.py 项目： prasad0896/ALDA-project

# %%
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(df.values[:, :-1],
                                                    df.values[:, -1],
                                                    test_size=0.30,
                                                    random_state=42,
                                                    shuffle=False)

y_train = y_train.astype('int')
y_test = y_test.astype('int')

# %%
sfs1 = sfs(clf,
           k_features='best',
           scoring='accuracy',
           verbose=2,
           forward=True,
           cv=tscv(n_splits=5))

#Perform SFFS
sfs1 = sfs1.fit(X_train, y_train)

# %%
print("Best accuracy from sfs:", sfs1.k_score_)
print("Indices selected by sfs:", sfs1.k_feature_idx_)
print("List of selected indices:",
      df.columns[[x for x in (list(sfs1.k_feature_idx_))]])

# %%
#sfs lr acc
clf.fit(X_train[:, list(sfs1.k_feature_idx_)], y_train)

示例#24

0

显示文件

文件： multiple linear regsn orgnl.py 项目： mohanpeddada/python-code-for-machine-learning-algorithms


reg__model.summary()
reg__model

predicted_values=(reg__model.predict(x_test))


from sklearn.metrics import mean_squared_error

np.sqrt(mean_squared_error(y_test, predicted_values))

np.exp(predicted_values)


import pandas as pd
dataset2.sort_values('income', ascending = False)

from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.linear_model import LinearRegression

model = sfs(LinearRegression(),2,forward=False,n_jobs=-1,floating=True,verbose=3,scoring='r2').fit(np.array(x_train),y_train)


model.k_feature_idx_
from mlxtend.feature_selection import ExhaustiveFeatureSelector as efs
model1 = efs(LinearRegression(),2,forward=False,n_jobs=-1,floating=True,verbose=3,scoring='r2').fit(np.array(x_train),y_train)


from mlxtend.feature_selection import ExhaustiveFeatureSelector as efs
efs(LinearRegression(),1, 3,n_jobs=-1,scoring='r2', print_progress= True, clone_estimator=True).fit((x_train),y_train)

示例#25

0

显示文件

文件： submission_template.py 项目： khanfarhan10/AI-Hacktoberfest

"""**Building model with the best features and checking the R2 score for the same**"""

mask = selector.support_
print(f"Best features according to RFE {X_m.columns[mask].values}")

X_m1 = X_m.iloc[:,mask]
# We could have used train test split or cross validation strategies
# for scoring the model but in order to compare with the stats model 
# we will use the whole data
model1 = LinearRegression().fit(X_m1,y_m)
print(f"R2 Score: {model1.score(X_m1,y_m)}")

"""### Forward Selection"""

model = LinearRegression(fit_intercept=False)
sfs1 = sfs(model,k_features=20,forward=True,scoring='r2',cv=5)
sfs1.fit(X_m,y_m)
fig = plot_sfs(sfs1.get_metric_dict())
plt.title('Forward Selection')
plt.grid()
plt.show()

print(sfs1.k_features, sfs1.k_feature_names_,sep="\n")

index = list(sfs1.k_feature_idx_)
X_m1 = X_m.iloc[:,index]
model1 = LinearRegression().fit(X_m1,y_m)
print(f"R2 Score: {model1.score(X_m1,y_m)}")

"""## Regularization
1. Lasso

示例#26

0

显示文件

# select a Series from the DataFrame
y = MFB_Data['2']
DT = MFB_Data.drop(['2'], axis=1)
X = DT[:]
# print the first 5 values
y.shape

# In[21]:

# Build step forward feature selection
from mlxtend.feature_selection import SequentialFeatureSelector as sfs

sfs1 = sfs(clf,
           k_features=56,
           forward=True,
           floating=False,
           verbose=2,
           scoring='accuracy',
           cv=5)

# In[99]:

# Which features?
feat_cols = list(sfs1.k_feature_idx_)
print(feat_cols)

# In[126]:

# check the type and shape of y
print(type(y))
print(y.shape)

示例#27

0

显示文件

文件： 6_SFFS_letter.py 项目： androidKunjappan/tipr

y = my_data[0:20000, 0].astype(str)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

clf = RandomForestClassifier(n_estimators=10)

#clf = SVC(kernel='linear')

#try multiple scoring parameters, like 'accuracy', 'neg_mean_squared_error', None
sfs1 = sfs(clf,
           k_features=10,
           forward=True,
           floating=True,
           verbose=2,
           scoring='accuracy',
           cv=3,
           n_jobs=-1)
sfs1 = sfs1.fit(X_train, y_train)

feat_cols = list(sfs1.k_feature_idx_)
print(feat_cols)

# Build full model with selected features
clf.fit(X_train[:, feat_cols], y_train)
train_accuracy = clf.score(X_train[:, feat_cols], y_train)
test_accuracy = clf.score(X_test[:, feat_cols], y_test)

y_train_pred = clf.predict(X_train[:, feat_cols])
y_test_pred = clf.predict(X_test[:, feat_cols])

示例#28

0

显示文件

    return X_train, X_test


X_train, X_test = standardize(X_train, X_test)

X_train
"""<b>Inference :</b> The above table is fetched upon implementing the Standard Scaling on Train dataset to bring all variables to the Standardized format.

## Feature selection
"""

linreg = LinearRegression()

linreg_forward = sfs(estimator=linreg,
                     k_features=100,
                     forward=True,
                     verbose=2,
                     scoring='r2')
sfs_forward = linreg_forward.fit(X_train, y_train)
"""<b>Inference :</b> building a forward feature selection

It is evident that from for features 31 to 51, the score is constant at 0.86 and decreases form the level 52 to 0.84,

This indicates with 51 sigbificant features, the Model Efficiency can be increased.

Hence we are running the Model with 51 features using the standard Linear Regression technique as done below.
"""

linreg = LinearRegression()

linreg_forward = sfs(estimator=linreg,

示例#29

0

显示文件

def train():
    bankdata = pd.read_csv('trainingbin_.csv')
    X = bankdata.drop('class_label', axis=1)
    y = bankdata['class_label']
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.feature_selection import SequentialFeatureSelector as sfs
    from sklearn.svm import SVC
    scaler = QuantileTransformer(output_distribution='uniform')
    X_train = scaler.fit_transform(X_train)
    #y_train= scaler.fit_transform(y_train)
    X_test = scaler.fit_transform(X_test)
    #y_test= scaler.fit_transform(y_test)
    #from sklearn.ensemble import RandomForestClassifier
    clf = svm.SVC(kernel='linear', C=8192)
    #clf = SVC(kernel='linear')
    #clf = RandomForestClassifier(n_estimators=100)
    sfs1 = sfs(clf,
               k_features=10,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy')
    sfs1 = sfs1.fit_transform(X_train, y_train)
    X_train_rfe = sfs1.fit_transform(X_train)
    X_test_rfe = sfs1.fit_transform(X_test)
    #clf = RandomForestClassifier(n_estimators=1000, random_state=42, max_depth=11)
    clf.fit(X_train_rfe, y_train)
    y_train_pred = clf.predict(X_train_rfe)
    from sklearn.metrics import accuracy_score as acc
    print('Training accuracy on all features: %.3f' %
          acc(y_train, y_train_pred))
    y_test_pred = clf.predict(X_test_rfe)
    print('Testing accuracy on all features: %.3f' % acc(y_test, y_test_pred))
    #svclassifier = SVC(kernel='rbf', gamma='auto', degree=3)

    #y_pred = test.predict(X_test)
    from sklearn.metrics import classification_report, confusion_matrix
    from sklearn import metrics
    from sklearn.metrics import accuracy_score as acc
    print(confusion_matrix(y_test, y_test_pred))
    cnf_matrix = confusion_matrix(y_test, y_test_pred)
    #print(classification_report(y_test,y_pred))
    FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
    FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
    TP = np.diag(cnf_matrix)
    TN = cnf_matrix.sum() - (FP + FN + TP)

    FP = FP.astype(float)
    FN = FN.astype(float)
    TP = TP.astype(float)
    TN = TN.astype(float)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)

    print("FNR:", sum(FNR) / 55)
    print("FPR:", sum(FPR) / 55)
    print("ACC:", 100 * (sum(ACC) / 55))

示例#30

0

显示文件

文件： SFS.py 项目： clingier/linear_models_project

import pandas as pd
import numpy as np
from sklearn import linear_model
import numpy as np
import plotly.graph_objs as go
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.preprocessing import scale

df = pd.read_csv('clean_data.csv', index_col=[0])

X = df.drop('lrfs', axis=1)
y = df['lrfs']

model = linear_model.LinearRegression()

sfs1 = sfs(model,
           k_features=(1, 16),
           forward=True,
           floating=True,
           verbose=2,
           scoring='r2')

sfs1 = sfs1.fit(X, y)

print("")

for i in sfs1.k_feature_idx_:
    print(X.columns[i])