# Our cvresults table (note, includes all individuals # with their mean, max, min, and std test score). out = pd.DataFrame( clf2.cv_results_ ) out = out.sort_values( "mean_test_score", ascending=False ) out.head() y_pred_gen = clf2.predict(X_test) accuracy_gen = accuracy_score(y_test, y_pred_gen) accuracy_gen # 81.88 % ####################### I am done ###################### """ How PermutationImportance works? Remove a feature only from the test part of the dataset, and compute score without using this feature. It doesn’t work as-is, because estimators expect feature to be present. So instead of removing a feature we can replace it with random noise - feature column is still there, but it no longer contains useful information. This method works if noise is drawn from the same distribution as original feature values (as otherwise estimator may fail).
random_state=24) total_CASESJ_train, total_CASESJ_test = train_test_split(total_CASESJ, test_size=0.2, random_state=24) train_IQ_train, train_IQ_test = train_test_split(train_IQ, test_size=0.2, random_state=24) total_CASEIQ_train, total_CASEIQ_test = train_test_split(total_CASEIQ, test_size=0.2, random_state=24) rtreeForSJ.fit(train_SJ, total_CASESJ) rtreeForIQ.fit(train_IQ, total_CASEIQ) predictionsSJ = rtreeForSJ.predict(train_SJ_test) predictionsIQ = rtreeForIQ.predict(train_IQ_test) sjscore = mean_absolute_error(total_CASESJ_test, predictionsSJ) iqscore = mean_absolute_error(total_CASEIQ_test, predictionsIQ) print(sjscore) print(iqscore) # print(len(predictionsSJ)+len(predictionsIQ)) # print(len(predictionsIQ)) # finalArr = [] # for k in predictionsSJ: # finalArr.append(k) # for t in predictionsIQ: # finalArr.append(t) # submission = pd.read_csv('submission_format.csv') # # print(finalArr[2]) # # for i in range(len(submission['total_cases'])):
} random.seed(1) cv = EvolutionaryAlgorithmSearchCV(estimator=mlp, params=p_grid, scoring="accuracy", cv=StratifiedKFold(n_splits=4), verbose=1, population_size=5, gene_mutation_prob=0.10, gene_crossover_prob=0.5, tournament_size=3, generations_number=5, n_jobs=4) #clf = GridSearchCV(mlp, param_grid=p_grid, cv=5, scoring='accuracy') cv.fit(X_train, y_train) print("VALIDATION score:" + str(cv.best_score_)) print("BEST parameters:" + str(cv.best_params_)) y_pred = cv.predict(X_test) print("accuracy:" + str(accuracy_score(y_test, y_pred))) """ result: VALIDATION score: 0.980712563622 BEST parameters: {'alpha': 1e-05, 'verbose': True, 'hidden_layer_sizes': (50,)} accuracy: 0.975530179445 """