def strategy05(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 5 para el segundo clasificador ''' print('\nEjecutando la estrategia número 5 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA # > Training: 5040 x 82 X_train, _, A1, Xm1, _ = pca(X_train, n_components=X_train.shape[1]) # Paso 3: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] X_train_sfs80 = X_train.copy() # Paso 5: PCA # > Training: 5040 x 10 X_train, _, A2, Xm2, _ = pca(X_train, n_components=10) #Paso 6: SFS # > Trainning: 5040 x 20 X_train = np.concatenate((X_train, X_train_sfs80), axis=1) s_sfs2 = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: clean X_test = np.matmul(X_test - Xm1, A1) # Paso 2: PCA X_test = X_test * a + b # Paso 3: normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS X_test_sfs80 = X_test.copy() X_test = np.matmul(X_test - Xm2, A2) # Paso 5: PCA X_test = np.concatenate((X_test, X_test_sfs80), axis=1) X_test = X_test[:, s_sfs2] # Paso 6: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def sequential_forward_selection(x_training, y_training, n_features, method): selected_features = sfs(x_training, y_training, n_features=n_features, method=method, show=False) return selected_features
def strategy02(X_train, labels_train, X_test, labels_test): ''' Estrategia número 2 para el primer clasificador ''' print('\nEjecutando la estrategia número 2 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA de 70 componentes # > Training: 5040 x 70 X_train, _, A, Xm, _ = pca(X_train, n_components=70) # Paso 3: Normalizacion # > Training: 5040 x 70 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 20 s_sfs = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = np.matmul(X_test - Xm, A) # Paso 2: PCA X_test = X_test * a + b # Paso 3: Normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy01(X_train, labels_train, X_test, labels_test): ''' Estrategia número 1 para el primer clasificador ''' print('\nEjecutando la estrategia número 1 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy04(X_train, labels_train, X_test, labels_test): ''' Estrategia número 4 para el primer clasificador ''' print('\nEjecutando la estrategia número 4 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 26 s_sfs = sfs(X_train, labels_train, n_features=26, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy01(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 1 para el segundo clasificador ''' print('\nEjecutando la estrategia número 1 del segundo clasificador...') # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def strategy03(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 3 para el segundo clasificador ''' print('\nEjecutando la estrategia número 3 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 3: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 20 X_train, _, A, Xm, _ = pca(X_train, n_components=20) # Paso 5: SFS # > Training: 5040 x 15 s_sfs2 = sfs(X_train, labels_train, n_features=15, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA X_test = X_test[:, s_sfs2] # Paso 5: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def WinnerStrategy(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} Y_pred = np.array([]) labels_test = np.array([]) # Probamos con los valores de testing classifier.fit(X_train, labels_train) for sample in groups['test']: patch_data = np.array([]) for patch in groups['test'][sample]: features = X_test[groups['test'][sample][patch], :].reshape(1, -1) patch_data = np.append(patch_data, classifier.predict(features)[0]) # Clase clasificada Y_pred = np.append(Y_pred, stats.mode(patch_data)[0][0]) labels_test = np.append(labels_test, get_class_by_name(sample)) results['Accuracy'] = performance(Y_pred, labels_test) * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def WinnerStrategy(X_train, labels_train, X_test, labels_test): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} # Clasificamos las muestras de Testing classifier.fit(X_train, labels_train) Y_pred = classifier.predict(X_test) accuracy = performance(Y_pred, labels_test) results['Accuracy'] = accuracy * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def SFS(x_train, y_train, x_test, x_val, n_features, method="fisher", show=False): """ Realiza la selección de 'n_features' características secuencialmente. n_features: número de características method: método a ocupar en la selección show: mostrar el progreso de la selección. """ s_sfs = sfs(x_train, y_train, n_features=n_features, method="fisher", show=show) x_train = x_train[:, s_sfs] x_test = x_test[:, s_sfs] x_val = x_val[:, s_sfs] return x_train, x_test, x_val
shuffle=False) # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 2-Training: Clean # > Training: 211 x 387 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 3-Training: Normalizacion # > Training: 211 x 387 X_train, a, b = normalize(X_train) # Paso 4-Training: SFS # > Training: 211 x 40 s_sfs = sfs(X_train, d_train, n_features=40, method="fisher", show=True) X_train = X_train[:, s_sfs] # Paso 5-Training: PCA # > Training: 211 x 10 X_train, _, A, Xm, _ = pca(X_train, n_components=10) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 2: clean X_test = X_test * a + b # Paso 3: normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS X_test = np.matmul(X_test - Xm, A) # Paso 5: PCA # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING ***
# Training and Testing data (90% training, 10% testing) idx_train, idx_test = stratify(classes, .90) f_train = features[idx_train] c_train = classes[idx_train] f_test = features[idx_test] c_test = classes[idx_test] f_train_norm, a, b = normalize(f_train) f_test_norm = f_test * a + b # %% from pybalu.feature_selection import sfs N_FEATURES = 15 selected_feats = sfs(f_train_norm, c_train, n_features=N_FEATURES, method="fisher", show=True) # %% from pybalu.classification import structure from pybalu.performance_eval import performance from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt def performance_for_features(feat_idxs): # train classifier knn = KNeighborsClassifier(n_neighbors=3) knn.fit(f_train_norm[:, feat_idxs], c_train) # predict and evaluate performance prediction = knn.predict(f_test_norm[:, feat_idxs])
from sklearn.neighbors import KNeighborsClassifier # Modelo KNN from pybalu.feature_selection import sfs # Selección de features. from pybalu.performance_eval import performance # Cómputo del accuracy. # Leemos los datos. mat1 = loadmat('DATOS1.mat') mat2 = loadmat('DATOS2.mat') # Cantidad de features. N_FEATURES = 15 # Obtenemos X,Y X, Y = mat2["X"], mat2["Y"].squeeze() # Selección de features selected_feats = sfs(X, Y, n_features=N_FEATURES, method="fisher", show=False) # Separamos los datos de entrenamiento y testing. Xtrain, Ytrain = mat1["Xtrain"], mat1["Ytrain"].squeeze() Xtest, Ytest = mat1["Xtest"], mat1["Ytest"].squeeze() # Se definen Xtrain_new,Xtest_new con las features seleccionadas. Xtrain_new, Xtest_new = Xtrain[:, selected_feats], Xtest[:, selected_feats] # Se define el modelo y se entrena. knn = KNeighborsClassifier(n_neighbors=1) knn.fit(Xtrain_new, Ytrain) # Se obtiene la predicción Ypred_new = knn.predict(Xtest_new)
print('Original extracted features: ' + str(X_train.shape[1]) + '(' + str(X_train.shape[0]) + ' samples)') # Training: Cleaning sclean = clean(X_train, show=True) X_train_clean = X_train[:, sclean] print(' cleaned features: ' + str(X_train_clean.shape[1]) + '(' + str(X_train_clean.shape[0]) + ' samples)') # Training: Normalization X_train_norm, a, b = normalize(X_train_clean) print(' normalized features: ' + str(X_train_norm.shape[1]) + '(' + str(X_train_norm.shape[0]) + ' samples)') # Training: Feature selection ssfs = sfs(X_train_norm, d_train, n_features=20, method="fisher", show=True) X_train_sfs = X_train_norm[:, ssfs] print(' selected features: ' + str(X_train_sfs.shape[1]) + '(' + str(X_train_sfs.shape[0]) + ' samples)') # Testing dataset print('Testing Subset:') X_test = np.concatenate((X0_test, X1_test), axis=0) d0_test = np.zeros([X0_test.shape[0], 1], dtype=int) d1_test = np.ones([X1_test.shape[0], 1], dtype=int) d_test = np.concatenate((d0_test, d1_test), axis=0) # Testing: Cleaning X_test_clean = X_test[:, sclean] # Testing: Normalization
p_clean = clean(Xtrain) after = len(p_clean) print(f"Cleaned.\nBefore:{before} features\nNow: {after} features") # Después de obtener qué features usaremos, modificamos nuestro dataset a solo # contener esas features Xtrain_cleaned = np.array([[x[i] for i in p_clean] for x in Xtrain]) Xtest_cleaned = np.array([[x[i] for i in p_clean] for x in Xtest]) # Un bug en el código de sfs no permite que los las clases se salten # enteros (1,3,5,..). Modificamos esto solo para correr sfs Ytrain_sfs = np.array([int((y - 1) / 2) for y in Ytrain]) # Obtenemos qué features sfs nos ha seleccionado p_sfs = sfs(Xtrain_cleaned, Ytrain_sfs, 100, show=True) # Nuevamente modificamos nuestro dataset para solo considerar las features # dadas por el selector Xtrain_sfs = np.array([[x[i] for i in p_sfs] for x in Xtrain_cleaned]) Xtest_sfs = np.array([[x[i] for i in p_sfs] for x in Xtest_cleaned]) # Inicializamos el clasificador con k=1 knn = KNeighborsClassifier(n_neighbors=1) knn.fit(Xtrain_sfs, Ytrain) # 3) Calculamos el accuracy usando la función performance entregada en pybalu # Predecimos y vemos el resultado pred = knn.predict(Xtest_sfs) print(f"Predicción tuvo un accuracy de {performance(pred, Ytest)}")