# # Start QDA Classification # print "Performing QDA Classification:" # from sklearn.qda import QDA # clf = QDA(priors=None, reg_param=0.001).fit(X_cropped, np.ravel(y_cropped[:])) # y_validation_predicted = clf.predict(X_validation) # print "Error rate for QDA (Validation): ", ml_aux.get_error_rate(y_validation,y_validation_predicted) # Start Random Forest Classification print "Performing Random Classification:" from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier(n_estimators=500) forest = forest.fit(X_cropped, np.ravel(y_cropped[:])) y_validation_predicted = forest.predict(X_validation) print "Error rate for Random Forest (Validation): ", ml_aux.get_error_rate(y_validation,y_validation_predicted) # ml_aux.plot_confusion_matrix(y_validation, y_validation_predicted, "CM Random Forest (t1)") # plt.show() pickle.dump(forest,open('t5_random_forest.pkl','wb')) # # Start k nearest neighbor Classification # print "Performing kNN Classification:" # from sklearn import neighbors # knn_model = neighbors.KNeighborsClassifier(n_neighbors=2, algorithm='auto',leaf_size=15) # knn_model.fit(X_cropped, y_cropped) # # y_train_predicted = knn_model.predict(X_train) # # print "Error Rate for kNN (Cropped): ", ml_aux.get_error_rate(y_train, y_train_predicted) # # y_validation_predicted = knn_model.predict(X_validation)
X = df_merged.drop(["Genre","Song ID","Track ID"], axis = 1) #Split from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y) #Train adaboost_model.fit(X_train,y_train) #Predict y_train_predicted = adaboost_model.predict(X_train) y_test_predicted = adaboost_model.predict(X_test) print "Number of Train Samples: ", (y_train.shape[0]) print "Number of Test Samples: ", (y_test.shape[0]) print "Train Classification Rate: ", (sum(y_train_predicted == y_train)) / float(y_train.shape[0]) print "Test Classification Rate: ", (sum(y_test_predicted == y_test)) / float(y_test.shape[0]) print ml_aux.getUniqueCount(y_train) print ml_aux.getUniqueCount(y_test) print "try func: ", ml_aux.get_error_rate(y_train, y_train_predicted) print ml_aux.plot_confusion_matrix(y_train,y_train_predicted,"Train") plt.show() ml_aux.plot_confusion_matrix(y_test,y_test_predicted,"Test") plt.show()
# Crop the dataset maxval = crop_rock.find_second_max_value(df_train_toCrop) df_cropped = crop_rock.drop_excess_rows(df_train_toCrop, maxval) y_cropped = df_cropped["Genre"] X_cropped = df_cropped.drop(["Genre"], axis=1) # Start LDA Classification print "Performing LDA Classification:" from sklearn.lda import LDA clf = LDA(solver="svd", shrinkage=None, n_components=None).fit(X_cropped, np.ravel(y_cropped[:])) # Use X_cropped to get best model y_train_predicted = clf.predict(X_train) print "Error rate for LDA on Training: ", ml_aux.get_error_rate(y_train, y_train_predicted) # ml_aux.plot_confusion_matrix(y_cropped, predicted, "CM on LDA cropped") # plt.show() y_validation_predicted = clf.predict(X_validation) print "Error rate for LDA on Validation: ", ml_aux.get_error_rate(y_validation, y_validation_predicted) # ml_aux.plot_confusion_matrix(y_validation, y_validation_predicted, "CM on LDA validation (t1)") # plt.show() # Start Adaboost Classification from sklearn.ensemble import AdaBoostClassifier adaboost_model = AdaBoostClassifier(n_estimators=50) adaboost_model = adaboost_model.fit(X_cropped, y_cropped) # predicted = adaboost_model.predict(X_cropped)