Exemplo n.º 1
0
from sklearn.linear_model import LogisticRegression
import time
from sklearn.model_selection import GridSearchCV

tmp = pd.read_csv(
    r'C:\Users\Yang\Documents\Tencent Files\1364131228\FileRecv\train_split_Depression_AVEC2017.csv'
)
tt = tmp.dropna()
y_train = tt["PHQ8_Binary"]
sex1 = tt['Gender']
tmp = pd.read_csv(
    r'C:\Users\Yang\Documents\Tencent Files\1364131228\FileRecv\full_test_split.csv'
)
tt = tmp.dropna()
sex2 = tt['Gender']
y_test = tt["PHQ_Binary"]

x_test = pd.read_csv(r'C:\Users\Yang\test_target.csv', index_col=0)
x_train = pd.read_csv(r'C:\Users\Yang\train_target.csv', index_col=0)

x_train = np.array(x_train)
y_train = np.array(y_train)

ans = []

fcbf = FCBF()
fcbf.fit(x_train[:, 0:100], y_train)
n = x_train[:, fcbf.idx_sel]
print(fcbf.idx_sel)
ans.append(n)
Exemplo n.º 2
0
from sklearn.ensemble import RandomForestRegressor

classifiers = [('SVR', LinearSVR(random_state=0, tol=1e-3)),
               ('RandomForestRegressor',
                RandomForestRegressor(n_estimators=1000,
                                      oob_score=True,
                                      n_jobs=-1))]

n_features = X_train_data.shape[1]
npieces = get_i(n_features)
"""
FCBF
"""
fcbf = FCBF()
t0 = time.time()
fcbf.fit(X_train_data, X_test_data)
elapsed_t = time.time() - t0

k = len(fcbf.idx_sel)  #Number of selected features for FCBFK and FCBFiP
"""
FCBF#
"""
fcbfk = FCBFK(k=k)
t0 = time.time()
fcbfk.fit(X_train_data, X_test_data)
elapsed_t = time.time() - t0

from skfeature.utility.mutual_information import su_calculation
#
#
#def fcbf(X, y, **kwargs):
Exemplo n.º 3
0
for tag, clf, param_grid in classifiers:
    """
    No Feature Selection
    """
    grid = GridSearchCV(clf, param_grid, cv=10, scoring='accuracy')
    grid.fit(dataset.data, dataset.target)

    print("No Feature Selection")
    print("Classifer: {}".format(tag))
    print("Best score: {}\n".format(grid.best_score_))
    """
    FCBF
    """
    fcbf = FCBF()
    t0 = time.time()
    fcbf.fit(dataset.data, dataset.target)
    elapsed_t = time.time() - t0
    """
    Validation 
    """
    grid = GridSearchCV(clf, param_grid, cv=10, scoring='accuracy')
    grid.fit(dataset.data[:, fcbf.idx_sel], dataset.target)

    print("FCBF")
    print("Classifer: {}".format(tag))
    print("Best score: {}".format(grid.best_score_))
    print("Elapsed Time: {}\n".format(elapsed_t))

    k = len(fcbf.idx_sel)  #Number of selected features for FCBFK and FCBFiP
    """
    FCBF#
Exemplo n.º 4
0
file.close()
file = open(path_dataset_save + 'y_train_picked.pckl', 'rb')
y_train_picked = pickle.load(file)
file.close()
file = open(path_dataset_save + 'y_test_picked.pckl', 'rb')
y_test_picked = pickle.load(file)
file.close()
X_train = X_train_picked
X_test = X_test_picked
y_train = y_train_picked
y_test = y_test_picked

print('Computing embedding...')
fcbf = FCBF()
t0 = time()
fcbf.fit(X_train, y_train)

print('Done!')
print('Time: ', (time() - t0))
k = len(fcbf.idx_sel)  # Number of selected features for FCBF
print(fcbf.idx_sel)
print("Number of selected features for FCBF", k)

#Create a Gaussian Classifier
model = MultinomialNB()

# Train the model using the training sets
model.fit(X_train[:, fcbf.idx_sel], y_train)

#Predict Output
predicted = model.predict(X_test[:, fcbf.idx_sel])