def battery(labels, unscaled_features, features_list, my_data): #SET UP GRID SEARCH PARAMETERS AND MODELS #Sets up ranges of C, Gamma, and Kernel to test with SVM in GridsearchCV scaler = preprocessing.MinMaxScaler() scaled_features = scaler.fit_transform(unscaled_features) C_range = np.logspace(-2, 5, 8) gamma_range = np.logspace(-5, 2, 8) parametersSVM = { 'kernel': ('linear', 'rbf'), 'C': C_range, 'gamma': gamma_range } #Sets up range of max depth, min sample split, and criterion to test with GridsearchCV max_depth = range(2, 20, 2) min_samples_split = range(2, 10, 2) parametersDT = { 'criterion': ('gini', 'entropy'), 'max_depth': max_depth, 'min_samples_split': min_samples_split } #Set up range of number of estimators to use in addition to previously defined ranges in random forest with GridsearchCV n_estimators = range(10, 100, 10) parametersRF = { 'n_estimators': n_estimators, 'criterion': ('gini', 'entropy') } print "CURRENT FEATURES: ", features_list dt = tree.DecisionTreeClassifier() rf = RandomForestClassifier() svr = svm.SVC() print "SVM" SVM = run_algorithm(svr, parametersSVM, scaled_features, labels) print "DT" DT = run_algorithm(dt, parametersDT, unscaled_features, labels) print "RF" RF = run_algorithm(rf, parametersRF, unscaled_features, labels) scaling = preprocessing.MinMaxScaler() estimators_SVM = [('scaling', scaling), ('algorithm', SVM)] estimators_DT = [('algorithm', DT)] estimators_RF = [('algorithm', RF)] print "CURRENT FEATURES: ", features_list print type(my_data) SVM_score = run_test(estimators_SVM, my_data, features_list) DT_score = run_test(estimators_DT, my_data, features_list) RF_score = run_test(estimators_RF, my_data, features_list) return SVM_score, DT_score, RF_score
def battery(labels, unscaled_features, features_list, my_data): #SET UP GRID SEARCH PARAMETERS AND MODELS #Sets up ranges of C, Gamma, and Kernel to test with SVM in GridsearchCV scaler = preprocessing.MinMaxScaler() scaled_features = scaler.fit_transform(unscaled_features) C_range = np.logspace(-2,5,8) gamma_range = np.logspace(-5,2,8) parametersSVM = {'kernel':('linear','rbf'),'C':C_range, 'gamma':gamma_range} #Sets up range of max depth, min sample split, and criterion to test with GridsearchCV max_depth = range(2,20,2) min_samples_split = range(2,10,2) parametersDT ={'criterion':('gini','entropy'),'max_depth':max_depth, 'min_samples_split':min_samples_split} #Set up range of number of estimators to use in addition to previously defined ranges in random forest with GridsearchCV n_estimators = range(10,100,10) parametersRF = {'n_estimators': n_estimators, 'criterion':('gini','entropy')} print "CURRENT FEATURES: ", features_list dt=tree.DecisionTreeClassifier() rf=RandomForestClassifier() svr = svm.SVC() print "SVM" SVM=run_algorithm(svr, parametersSVM, scaled_features, labels) print "DT" DT=run_algorithm(dt, parametersDT, unscaled_features, labels) print "RF" RF=run_algorithm(rf, parametersRF, unscaled_features, labels) scaling = preprocessing.MinMaxScaler() estimators_SVM = [('scaling', scaling), ('algorithm', SVM)] estimators_DT = [('algorithm', DT)] estimators_RF = [('algorithm', RF)] print "CURRENT FEATURES: ", features_list print type(my_data) SVM_score = run_test(estimators_SVM, my_data, features_list) DT_score = run_test(estimators_DT, my_data, features_list) RF_score = run_test(estimators_RF, my_data, features_list) return SVM_score, DT_score, RF_score
#SCALE REDUCED DATA #Scales the data sets that have the reduced numbers of features created above. scaler2 = preprocessing.MinMaxScaler() reduced_features = scaler2.fit_transform(reduced_unscaled_features) #SET UP GRID SEARCH PARAMETERS AND MODELS #Set up range of number of estimators to use in addition to previously defined ranges in random forest with GridsearchCV n_estimators = range(10,100,10) parametersRF = {'n_estimators': n_estimators, 'criterion':('gini','entropy')} #Creates the decision tree, random forest, and SVM classifiers rf=RandomForestClassifier() #Runs GridsearchCV with the selected model and features print "RF" RF=run_algorithm(rf, parametersRF, reduced_unscaled_features, reduced_labels) #Set up parameters for pipeline so that the entire pipeline can be passed to grader scaling = preprocessing.MinMaxScaler() estimators_RF = [('algorithm', RF)] print "Reduced RF" RRF = run_test(estimators_RF, my_dataset, reduced_features_list) #Pickles model, data, and selected features dump_classifier_and_data(RF, data_dict ,reduced_features_list)