def UF(X, y): # 计算feature间的相关性,进行选择,这里做的是forward k_range = [50 * i for i in range(1, 4)] for k in k_range: selector = SelectKBest(chi2, k=k) X_ = selector.fit_transform(X, y) X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True) SVM_recommend_run(F_UF, X_train, X_test, y_train, y_test, paras={'k-best': k})
def SFM(X, y): # 从模型中选择,根据重要性,类似逐个选择,后向选择,逐渐抛弃不重要的 X_train, X_test, y_train, y_test = pre_process(X, y) clf = SVM_recommend() m_range = [2000 - 50 * i for i in range(36, 40)] for m in m_range: selector = SelectFromModel( clf, threshold=-np.inf, max_features=m) # 只根据max_features确定选择的数量,不设定threshold X_ = selector.fit_transform(np.asarray(X), np.asarray(y)) X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True) clf = SVM_recommend_run(B_SFM, X_train, X_test, y_train, y_test, paras={'max-features': m})
def VT(X, y): # 利用方差进行选拔, 这里是backward for var in [0.03 * i for i in range(1, 50)]: selector = VarianceThreshold(threshold=var) X_ = selector.fit_transform(X) # X.shape[1] # selected feature amount X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True) # 有问题 SVM_recommend_run(B_VT, X_train, X_test, y_train, y_test, paras={ 'variance': var, 'feature-num': X_.shape[1] })
if feature not in good_features: selected_features = list(good_features) + [feature] Xts = np.column_stack(X[:, j] for j in selected_features) score = evaluateScore(Xts, y) scores.append((score, feature)) print("Current AUC : ", np.mean(score)) good_features.add(sorted(scores)[-1][1]) score_history.append(sorted(scores)[-1]) print("Current Features : ", sorted(list(good_features))) # Remove last added feature good_features.remove(score_history[-1][1]) good_features = sorted(list(good_features)) print("Selected Features : ", good_features) return good_features def transform(X, y): good_features = selectionLoop(X, y) return X[:, good_features] if __name__ == "__main__": os.chdir('..') X, y = load_data_small() print(X.shape) X_ = transform(X, y) X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True) SVM_recommend_run(AUC, X_train, X_test, y_train, y_test, {'feature-num': X_.shape[1]})
import sys sys.path.append('..') import os import numpy as np from lib.genetic import * from configs import * from load_data import load_data_small from pre_process import pre_process from baseline import SVM_recommend, SVM_recommend_run if __name__ == "__main__": os.chdir('..') clf = SVM_recommend() X, y = load_data_small() for i in range(1,10): ga_selector = FeatureSelectionGA(clf,X,y,verbose=1) feature_num = 200*i pop = ga_selector.generate(feature_num) X_ = X[:,pop] X_train, X_test, y_train, y_test = pre_process(X_, y,bReset=True) SVM_recommend_run(GA, X_train, X_test, y_train, y_test, {'feature_num': feature_num})
print("t-SNE: %.2g sec" % (t1 - t0)) x_min, x_max = X_.min(0), X_.max(0) X_norm = (X_ - x_min) / (x_max - x_min) # 归一化 plt.figure(figsize=(8, 8)) for i in range(X_norm.shape[0]): #plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]), # fontdict={'weight': 'bold', 'size': 9}) plt.scatter(X_norm[i, 0], X_norm[i, 1], color=cm(1. * y[i] / NUM_COLORS)) plt.xticks([]) plt.yticks([]) name = str(n_components) + " " + str(perplexity) + " " + str( random_state) + ".png" plt.savefig(name) plt.show() #X_ = transform(X, y) X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True) SVM_recommend_run(tSNE, X_train, X_test, y_train, y_test, paras={ 'n_cp': n_components, 'ppl': perplexity, 'rd': random_state })
import sys sys.path.append('..') from boruta import Boruta import os from configs import * from baseline import SVM_recommend, SVM_recommend_run from load_data import load_data from pre_process import pre_process if __name__ == "__main__": os.chdir('..') clf = SVM_recommend() feat_selector = BorutaPy(clf, n_estimators='auto') feat_selector.fit(X, y) print(feat_selector.support_) selected = X[:, feat_selector.support_] print("") print("Selected Feature Matrix Shape") print(selected.shape) X_train, X_test, y_train, y_test = pre_process(X, y) SVM_recommend_run(BORUTA, X_test, X_test, y_train, y_test, {'feature_num': selected.shape[1]})