from sklearn.ensemble import RandomForestClassifier ####### #we want 10 feature sfs = SequentialFeatureSelector(RandomForestClassifier(n_estimators=10, n_jobs=-1), k_features=10, forward=True, floating=False, verbose=2, scoring='accuracy', cv=3) X = sfs.fit_transform(X, y) """ sfs=SequentialFeatureSelector(DecisionTreeClassifier(),k_features=10, forward=True,floating=False,verbose=2,scoring='accuracy',cv=3) sfs=sfs.fit(X,y) X=sfs.fit_transform(X,y) """ ##################################################################### ### Machine l. modelini eğitmek için x_train,y_train olarak ayırıyoruz from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X,
def sequential(X, y, *, estimator, direction='forward', n_features=10, cv=0): """Sequential feature selection. Sequential feature selection algorithms are a family of greedy search algorithms that are used to reduce an initial d-dimensional feature space to a k-dimensional feature subspace where k < d. These algorithms remove or add one feature at a time based on the classifier performance until a feature subset of the desired size k is reached. Parameters ---------- X : ndarray of shape (n_samples, n_features_pre) Feature matrix. y : labels, ndarray of shape (n_samples,) Response variables. estimator : object Classifier - must include coef_ or feature_importances_ attribute. direction : string, default='forward' Direction of sequential model, can be 'forward' or 'backward'. n_features : int, default=None Number of features to select. cv : int, default=0 Number of cross-validation steps. Returns ------- arr : ndarray of shape (n_samples, n_features) Array containing features selected by the sequential models. Examples -------- >>> import numpy as np >>> from sklearn.ensemble import RandomForestClassifier >>> from protlearn.features import aac, aaindex1, ngram >>> from protlearn.dimreduction import sequential >>> seqs = ['ARKLY', 'EERKPGL', 'PGPGEERNLY'] >>> labels = [1., 0., 0.] >>> comp, _ = aac(seqs) >>> aaind, _ = aaindex1(seqs) >>> ng, _ = ngram(seqs) >>> features = np.concatenate([comp, aaind, ng], axis=1) >>> features.shape (3, 575) >>> rf = RandomForestClassifier() >>> reduced = sequential(features, labels, rf, n_features=10) >>> reduced.shape (3, 10) """ if direction == 'forward': method = True elif direction == 'backward': method = False mdl = SequentialFeatureSelector(estimator, k_features=n_features, forward=method, floating=False, verbose=0, scoring='accuracy', cv=cv) arr = mdl.fit_transform(X, y) return arr
def applyFeatureSelection(X, y, algorithm, n_components, mode, merged=False): if merged: newX = np.reshape(X, (-1, X.shape[3])) newY = np.reshape(y, y.shape[0] * y.shape[1] * y.shape[2]) else: newX = np.reshape(X, (-1, X.shape[2])) newY = np.reshape(y, y.shape[0] * y.shape[1]) feature = None # different ways to select features with wrapper methods: https://stackabuse.com/applying-wrapper-methods-in-python-for-feature-selection/ # RFE vs. SFS: https://stackoverflow.com/questions/35640168/wrapper-methods-for-feature-selection-machine-learning-in-scikit-learn if mode == "forward": #todo"""takes forever, maybe change params like n_jobs""" from mlxtend.feature_selection import SequentialFeatureSelector feature = SequentialFeatureSelector(algorithm, k_features=n_components, forward=True) elif mode == "backward_threshold" or mode == 'mixed': from sklearn.feature_selection import SelectFromModel feature = SelectFromModel(algorithm, max_features=n_components) elif mode == "backward_iterate": """legacy option, way slower than with a threshold""" from sklearn.feature_selection import RFE feature = RFE(algorithm, n_features_to_select=n_components) else: raise ValueError("Unknown feature selection mode " + mode) new_X = feature.fit_transform(newX, newY) if mode == 'mixed': formerX = newX.transpose() new_X = new_X.transpose() # remove features already present unused_X = [] for i in range(len(formerX)): newXrow = formerX[i] found = False for j in range(len(new_X)): checkXrow = new_X[j] if (newXrow == checkXrow).all(): found = True break if not found: unused_X.append(newXrow) # add missing features from sklearn.feature_selection import RFE # n_features_to_select means that this many features will REMAIN afterwards, not that they are selected for removal feature = RFE(algorithm, n_features_to_select=n_components - new_X.shape[0]) unused_X = np.array(unused_X).transpose() add_me = feature.fit_transform(unused_X, newY) # merge new_X = np.concatenate((new_X.transpose(), add_me), axis=1) if merged: new_X = np.reshape(new_X, (X.shape[0], X.shape[1], X.shape[2], -1)) else: new_X = np.reshape(new_X, (X.shape[0], X.shape[1], -1)) return new_X, feature
new_data = pd.DataFrame(data["data"], columns=data["feature_names"]) print(new_data) target = data.target print(target) new_data = pd.concat( [new_data, pd.DataFrame(target, columns=["target"])], axis=1) print(new_data) new_data.columns = [ "sepal_length", "sepal_width", "petal_length", " petal_width", "target" ] print(new_data) X = new_data.drop(columns=["target"]) y = new_data["target"] from mlxtend.feature_selection import SequentialFeatureSelector as SFS from sklearn.linear_model import LinearRegression #for backward selection sbs = SFS(LinearRegression(), k_features=2) bckwrd = sbs.fit(X, y) print(bckwrd.k_feature_names_) # for forward selecion sfs = SFS(LinearRegression(), k_features=2, forward=True) forward_slection = sfs.fit_transform(X, y) print(sfs.k_feature_names_)