SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), ExtraTreesClassifier() ] #iteration over various classifiers for name, clf in zip(names, classifiers): clf.fit(features_train, labels_train) scores = clf.score(features_test, labels_test) print " " print "Classifier:" evaluate.evaluate_clf(clf, features, labels, num_iters=1000, test_size=0.3) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print "=====================================================================" #Model tuning with grid_search.GridSearchCV ##Define Scoring and Cross Validation from sklearn import grid_search from sklearn.tree import DecisionTreeClassifier cv = sklearn.cross_validation.StratifiedShuffleSplit(labels, n_iter=10) def scoring(estimator, features_test, labels_test): labels_pred = estimator.predict(features_test) p = sklearn.metrics.precision_score(labels_test, labels_pred,
### Adaboost Classifier from sklearn.ensemble import AdaBoostClassifier a_clf = AdaBoostClassifier(algorithm='SAMME') ### Support Vector Machine Classifier from sklearn.svm import SVC s_clf = SVC(kernel='rbf', C=1000) ### Random Forest from sklearn.ensemble import RandomForestClassifier rf_clf = RandomForestClassifier() ### Stochastic Gradient Descent - Logistic Regression from sklearn.linear_model import SGDClassifier g_clf = SGDClassifier(loss='log') ### Selected Classifiers Evaluation evaluate.evaluate_clf(l_clf, features, labels) evaluate.evaluate_clf(k_clf, features, labels) ### Final Machine Algorithm Selection clf = l_clf # dump your classifier, dataset and features_list so # anyone can run/check your results pickle.dump(clf, open("../data/my_classifier.pkl", "w")) pickle.dump(my_dataset, open("../data/my_dataset.pkl", "w")) pickle.dump(my_feature_list, open("../data/my_feature_list.pkl", "w"))
SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), ExtraTreesClassifier()] # iterate over classifiers for name, clf in zip(names, classifiers): clf.fit(features_train,labels_train) scores = clf.score(features_test,labels_test) print " " print "Classifier:" evaluate.evaluate_clf(clf, features, labels, num_iters=1000, test_size=0.3) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print "=====================================================================" #Model tuning using grid_search.GridSearchCV ##define cv and scoring from sklearn import grid_search from sklearn.tree import DecisionTreeClassifier cv = sklearn.cross_validation.StratifiedShuffleSplit(labels, n_iter=10) def scoring(estimator, features_test, labels_test): labels_pred = estimator.predict(features_test) p = sklearn.metrics.precision_score(labels_test, labels_pred, average='micro') r = sklearn.metrics.recall_score(labels_test, labels_pred, average='micro') if p > 0.3 and r > 0.3:
k_clf = KMeans(n_clusters=2, tol=0.001) ### Adaboost Classifier from sklearn.ensemble import AdaBoostClassifier a_clf = AdaBoostClassifier(algorithm='SAMME') ### Support Vector Machine Classifier from sklearn.svm import SVC s_clf = SVC(kernel='rbf', C=1000) ### Random Forest from sklearn.ensemble import RandomForestClassifier rf_clf = RandomForestClassifier() ### Stochastic Gradient Descent - Logistic Regression from sklearn.linear_model import SGDClassifier g_clf = SGDClassifier(loss='log') ### Selected Classifiers Evaluation evaluate.evaluate_clf(l_clf, features, labels) evaluate.evaluate_clf(k_clf, features, labels) ### Final Machine Algorithm Selection clf = l_clf # dump your classifier, dataset and features_list so # anyone can run/check your results pickle.dump(clf, open("../data/my_classifier.pkl", "w")) pickle.dump(my_dataset, open("../data/my_dataset.pkl", "w")) pickle.dump(my_feature_list, open("../data/my_feature_list.pkl", "w"))