示例#1
0
from ex1.data import getData, plot_feature_importance, plot_tree


trainData, testData = getData(fraction=1, deskew=True, subtractMean=True)
trainSet, trainLabels = trainData
testSet, testLabels = testData


# Parameter Grid Search with Cross-Validation
# tuned_parameters = {
#                      'criterion' : ['gini', 'entropy'],
#                      'max_depth' : [3, 5, 10, 30, 50, 70, 100],
#                      'max_features' : [3, 5, 50, 70, 100, 350, 500, 650, 784]
#                     }
#
# classifier = GridSearchCV(tree.DecisionTreeClassifier(), tuned_parameters, cv=5, n_jobs=-1)
# print classifier.fit(trainSet, trainLabels)
# print classifier.best_params_
# predictions = classifier.predict(testSet)
# print accuracy_score(predictions, testLabels)

# #Best Parameters from CrossValidation
best_params = {'max_features': 500, 'criterion': 'entropy', 'max_depth': 100}
classifier = tree.DecisionTreeClassifier(**best_params)
print classifier.fit(trainSet, trainLabels)

predictions = classifier.predict(testSet)
print accuracy_score(predictions, testLabels)
plot_tree(classifier, 'tree')
plot_feature_importance(classifier, 'tree')
示例#2
0

trainData, testData = getData(fraction=1, deskew=True, subtractMean=True)
trainSet, trainLabels = trainData
testSet, testLabels = testData


# # Parameter Grid Search with Cross-Validation
# tuned_parameters = {
#                      'n_estimators' : [2, 5, 10, 20, 30, 50, 70, 100],
#                      'criterion' : ['gini', 'entropy'],
#                      'max_depth' : [3, 5, 10, 30, 50, 70, 100],
#                      'max_features' : [3, 5, 50, 70, 100, 350, 500, 650, 784]
#                     }
#
# classifier = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5, n_jobs=-1)
# print classifier.fit(trainSet, trainLabels)
# print classifier.best_params_
# predictions = classifier.predict(testSet)
# print accuracy_score(predictions, testLabels)

# #Best Parameters from CrossValidation
best_params = {'max_features': 50, 'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 10}
classifier = RandomForestClassifier(**best_params)
print classifier.fit(trainSet, trainLabels)

predictions = classifier.predict(testSet)
print accuracy_score(predictions, testLabels)
# plot_tree(classifier, 'random')
plot_feature_importance(classifier, 'random')