#model with dimensionality reduction through Isometric Mapping (not for regression) #---------------------------------------------------------------------------------- iso = manifold.Isomap() best = CV(iso, x, y) benchmark.append(best) benchmark_names.append('Isomap') print('Isomap \nNumber of features %d \nError %.3f \nAccuracy %.3f\n' % best) #------------------------------------------------------- #model with feature selection based on genetic algorithm #------------------------------------------------------- gen = Genetic(features=x.columns.size, parents=5, children=20, mutation_scale=0.05) gen.fit(x, y, 20, modelCV) benchmark.append( (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1])) benchmark_names.append('Genetic algorithm') print( 'Genetic algorithm \nNumber of features %d \nError %.3f \nAccuracy %.3f\n' % (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1])) #ploting results normalized to result without feature selection or dimensionality reduction benchmark = np.array(benchmark) benchmark /= benchmark[0] benchmark = pd.DataFrame(benchmark, columns=['number of features', 'loss', 'accuracy']) benchmark['algorithm'] = benchmark_names benchmark = benchmark.set_index('algorithm')
columns.append('seed_pseudorapidity') data[1] = np.log(data[1]) data[2] = np.log(data[2]) data.columns = columns sc = preprocessing.StandardScaler() data = pd.DataFrame(sc.fit_transform(data)) x = pd.DataFrame(np.array(data[:])) y = pd.DataFrame(np.array(data_org)[:,0]) ######################################################## #model with feature selection based on genetic algorithm ######################################################## gen = Genetic( features = x.columns.size, parents = 5, children = 30, mutation_scale = 0.05 ) gen.fit(x, y, 10, modelCV) print('\n\nGenetic algorithm\nNumber of features %d \nError %.3f \nAccuracy %.3f\n' % (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1])) print('\n============================\n') print('Set of features from last generation') print(gen.tab) print('\n-----------\n') print('Results of features from last generation (loss)') print(gen.results) #ploting results of dimensionality reduction plt.plot(gen.best.T[0], 'b-') plt.plot(gen.mean_.T[0], 'r-') plt.legend(['best', 'mean']) plt.title('Generic algorithm') plt.xlabel('generation')