from sklearn.linear_model import LogisticRegression import time from sklearn.model_selection import GridSearchCV tmp = pd.read_csv( r'C:\Users\Yang\Documents\Tencent Files\1364131228\FileRecv\train_split_Depression_AVEC2017.csv' ) tt = tmp.dropna() y_train = tt["PHQ8_Binary"] sex1 = tt['Gender'] tmp = pd.read_csv( r'C:\Users\Yang\Documents\Tencent Files\1364131228\FileRecv\full_test_split.csv' ) tt = tmp.dropna() sex2 = tt['Gender'] y_test = tt["PHQ_Binary"] x_test = pd.read_csv(r'C:\Users\Yang\test_target.csv', index_col=0) x_train = pd.read_csv(r'C:\Users\Yang\train_target.csv', index_col=0) x_train = np.array(x_train) y_train = np.array(y_train) ans = [] fcbf = FCBF() fcbf.fit(x_train[:, 0:100], y_train) n = x_train[:, fcbf.idx_sel] print(fcbf.idx_sel) ans.append(n)
import time from sklearn.model_selection import GridSearchCV from sklearn.ensemble import RandomForestRegressor classifiers = [('SVR', LinearSVR(random_state=0, tol=1e-3)), ('RandomForestRegressor', RandomForestRegressor(n_estimators=1000, oob_score=True, n_jobs=-1))] n_features = X_train_data.shape[1] npieces = get_i(n_features) """ FCBF """ fcbf = FCBF() t0 = time.time() fcbf.fit(X_train_data, X_test_data) elapsed_t = time.time() - t0 k = len(fcbf.idx_sel) #Number of selected features for FCBFK and FCBFiP """ FCBF# """ fcbfk = FCBFK(k=k) t0 = time.time() fcbfk.fit(X_train_data, X_test_data) elapsed_t = time.time() - t0 from skfeature.utility.mutual_information import su_calculation #
npieces = get_i(n_features) for tag, clf, param_grid in classifiers: """ No Feature Selection """ grid = GridSearchCV(clf, param_grid, cv=10, scoring='accuracy') grid.fit(dataset.data, dataset.target) print("No Feature Selection") print("Classifer: {}".format(tag)) print("Best score: {}\n".format(grid.best_score_)) """ FCBF """ fcbf = FCBF() t0 = time.time() fcbf.fit(dataset.data, dataset.target) elapsed_t = time.time() - t0 """ Validation """ grid = GridSearchCV(clf, param_grid, cv=10, scoring='accuracy') grid.fit(dataset.data[:, fcbf.idx_sel], dataset.target) print("FCBF") print("Classifer: {}".format(tag)) print("Best score: {}".format(grid.best_score_)) print("Elapsed Time: {}\n".format(elapsed_t)) k = len(fcbf.idx_sel) #Number of selected features for FCBFK and FCBFiP
file = open(path_dataset_save + 'X_test_picked.pckl', 'rb') X_test_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_train_picked.pckl', 'rb') y_train_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_test_picked.pckl', 'rb') y_test_picked = pickle.load(file) file.close() X_train = X_train_picked X_test = X_test_picked y_train = y_train_picked y_test = y_test_picked print('Computing embedding...') fcbf = FCBF() t0 = time() fcbf.fit(X_train, y_train) print('Done!') print('Time: ', (time() - t0)) k = len(fcbf.idx_sel) # Number of selected features for FCBF print(fcbf.idx_sel) print("Number of selected features for FCBF", k) #Create a Gaussian Classifier model = MultinomialNB() # Train the model using the training sets model.fit(X_train[:, fcbf.idx_sel], y_train)