def strategy01(X_train, labels_train, X_test, labels_test): ''' Estrategia número 1 para el primer clasificador ''' print('\nEjecutando la estrategia número 1 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy02(X_train, labels_train, X_test, labels_test): ''' Estrategia número 2 para el primer clasificador ''' print('\nEjecutando la estrategia número 2 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA de 70 componentes # > Training: 5040 x 70 X_train, _, A, Xm, _ = pca(X_train, n_components=70) # Paso 3: Normalizacion # > Training: 5040 x 70 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 20 s_sfs = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = np.matmul(X_test - Xm, A) # Paso 2: PCA X_test = X_test * a + b # Paso 3: Normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy04(X_train, labels_train, X_test, labels_test): ''' Estrategia número 4 para el primer clasificador ''' print('\nEjecutando la estrategia número 4 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 26 s_sfs = sfs(X_train, labels_train, n_features=26, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy01(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 1 para el segundo clasificador ''' print('\nEjecutando la estrategia número 1 del segundo clasificador...') # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def WinnerStrategy(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} Y_pred = np.array([]) labels_test = np.array([]) # Probamos con los valores de testing classifier.fit(X_train, labels_train) for sample in groups['test']: patch_data = np.array([]) for patch in groups['test'][sample]: features = X_test[groups['test'][sample][patch], :].reshape(1, -1) patch_data = np.append(patch_data, classifier.predict(features)[0]) # Clase clasificada Y_pred = np.append(Y_pred, stats.mode(patch_data)[0][0]) labels_test = np.append(labels_test, get_class_by_name(sample)) results['Accuracy'] = performance(Y_pred, labels_test) * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def strategy05(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 5 para el segundo clasificador ''' print('\nEjecutando la estrategia número 5 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA # > Training: 5040 x 82 X_train, _, A1, Xm1, _ = pca(X_train, n_components=X_train.shape[1]) # Paso 3: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] X_train_sfs80 = X_train.copy() # Paso 5: PCA # > Training: 5040 x 10 X_train, _, A2, Xm2, _ = pca(X_train, n_components=10) #Paso 6: SFS # > Trainning: 5040 x 20 X_train = np.concatenate((X_train, X_train_sfs80), axis=1) s_sfs2 = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: clean X_test = np.matmul(X_test - Xm1, A1) # Paso 2: PCA X_test = X_test * a + b # Paso 3: normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS X_test_sfs80 = X_test.copy() X_test = np.matmul(X_test - Xm2, A2) # Paso 5: PCA X_test = np.concatenate((X_test, X_test_sfs80), axis=1) X_test = X_test[:, s_sfs2] # Paso 6: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def WinnerStrategy(X_train, labels_train, X_test, labels_test): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} # Clasificamos las muestras de Testing classifier.fit(X_train, labels_train) Y_pred = classifier.predict(X_test) accuracy = performance(Y_pred, labels_test) results['Accuracy'] = accuracy * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def strategy03(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 3 para el segundo clasificador ''' print('\nEjecutando la estrategia número 3 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 3: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 20 X_train, _, A, Xm, _ = pca(X_train, n_components=20) # Paso 5: SFS # > Training: 5040 x 15 s_sfs2 = sfs(X_train, labels_train, n_features=15, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA X_test = X_test[:, s_sfs2] # Paso 5: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
# > Testing : 53 x 1589 X_train, X_test, d_train, d_test = train_test_split(X, d, test_size=0.2, shuffle=False) # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 2-Training: Clean # > Training: 211 x 387 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 3-Training: Normalizacion # > Training: 211 x 387 X_train, a, b = normalize(X_train) # Paso 4-Training: SFS # > Training: 211 x 40 s_sfs = sfs(X_train, d_train, n_features=40, method="fisher", show=True) X_train = X_train[:, s_sfs] # Paso 5-Training: PCA # > Training: 211 x 10 X_train, _, A, Xm, _ = pca(X_train, n_components=10) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 2: clean X_test = X_test * a + b # Paso 3: normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS
data = loadmat("realdata") features = data["features"] classes = data["classes"].squeeze() # %% from pybalu.data_selection import stratify from pybalu.feature_transformation import normalize # Training and Testing data (90% training, 10% testing) idx_train, idx_test = stratify(classes, .90) f_train = features[idx_train] c_train = classes[idx_train] f_test = features[idx_test] c_test = classes[idx_test] f_train_norm, a, b = normalize(f_train) f_test_norm = f_test * a + b # %% from pybalu.feature_selection import sfs N_FEATURES = 15 selected_feats = sfs(f_train_norm, c_train, n_features=N_FEATURES, method="fisher", show=True) # %% from pybalu.classification import structure from pybalu.performance_eval import performance from sklearn.neighbors import KNeighborsClassifier
def main(): ''' Desarrollo de el flujo del programa de reconocimiento de paredes rayadas La metodología utilizada es la que se describe en: https://github.com/domingomery/patrones/blob/master/clases/Cap03_Seleccion_de_Caracteristicas/presentations/PAT03_GeneralSchema.pdf La estructura del código se basó en el código de ejemplo de la actividad en clases: https://github.com/domingomery/patrones/tree/master/clases/Cap03_Seleccion_de_Caracteristicas/ejercicios/PCA_SFS ''' # Paso 1: Extracción de características # > 4000 imágenes de training rayadas # > 4000 imágenes de training no rayadas # > 1000 imágenes de testing rayadas # > 1000 imágenes de testing no rayadas # > 357 características por imagen features = FeatureExtractor(classes=CLASSES) # Paso 2: Definición de datos training - testing # > Training: 8000 x 357 # > Testing: 2000 x 357 X_train, labels_train = features['feature_values_train'], features[ 'labels_train'] X_test, labels_test = features['feature_values_test'], features[ 'labels_test'] X_train, labels_train = np.array(X_train), np.array(labels_train) X_test, labels_test = np.array(X_test), np.array(labels_test) # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50) X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** knn = KNN(n_neighbors=3) knn.fit(X_train, labels_train) Y_pred = knn.predict(X_test) # *** Estadísticas y desempeño del clasificador *** accuracy = performance(Y_pred, labels_test) print("Accuracy = " + str(accuracy)) confusionMatrix(Y_pred, labels_test) printChoosenFeatures = True if printChoosenFeatures: feature_names = np.array(features['feature_names']) feature_names = feature_names[s_sfs] print('Las features seleccionadas por el sistema son:') for name in feature_names: print(name, end=' -- ') # *** Guardado de las variables para el reconocedor externo *** with open('data/reconocedor.json', 'w') as file: file.write( json.dumps({ 's_clean': s_clean.tolist(), 'a': a.tolist(), 'b': b.tolist(), 's_sfs': s_sfs.tolist() }))
print('Training Subset:') X_train = np.concatenate((X0_train, X1_train), axis=0) d0_train = np.zeros([X0_train.shape[0], 1], dtype=int) d1_train = np.ones([X1_train.shape[0], 1], dtype=int) d_train = np.concatenate((d0_train, d1_train), axis=0) print('Original extracted features: ' + str(X_train.shape[1]) + '(' + str(X_train.shape[0]) + ' samples)') # Training: Cleaning sclean = clean(X_train, show=True) X_train_clean = X_train[:, sclean] print(' cleaned features: ' + str(X_train_clean.shape[1]) + '(' + str(X_train_clean.shape[0]) + ' samples)') # Training: Normalization X_train_norm, a, b = normalize(X_train_clean) print(' normalized features: ' + str(X_train_norm.shape[1]) + '(' + str(X_train_norm.shape[0]) + ' samples)') # Training: Feature selection ssfs = sfs(X_train_norm, d_train, n_features=20, method="fisher", show=True) X_train_sfs = X_train_norm[:, ssfs] print(' selected features: ' + str(X_train_sfs.shape[1]) + '(' + str(X_train_sfs.shape[0]) + ' samples)') # Testing dataset print('Testing Subset:') X_test = np.concatenate((X0_test, X1_test), axis=0) d0_test = np.zeros([X0_test.shape[0], 1], dtype=int) d1_test = np.ones([X1_test.shape[0], 1], dtype=int) d_test = np.concatenate((d0_test, d1_test), axis=0)
def clean_norm(X): sclean = clean(X, show=True) X = X[:, sclean] X, a, b = normalize(X) return X, sclean, a, b