from sklearn import tree, ensemble, datasets import util if __name__ == "__main__": breast_cancer = datasets.load_breast_cancer() # set of parameters to test param_grid = {"n_estimators": [1,5,10,15,30,40,50], "learning_rate": [.1, .5, 1.] } offset = int(0.6*len(breast_cancer.data)) X_train = breast_cancer.data[:offset] Y_train = breast_cancer.target[:offset] X_test = breast_cancer.data[offset:] Y_test = breast_cancer.target[offset:] dt = tree.DecisionTreeClassifier(min_samples_split=2, max_leaf_nodes=20, criterion='entropy', max_depth=15, min_samples_leaf=1) classifier = ensemble.AdaBoostClassifier(dt) ts_gs = util.run_gridsearch(X_train, Y_train, classifier, param_grid, cv=10)
from sklearn import preprocessing, ensemble, tree import pandas as pd import util as util if __name__ == "__main__": df = pd.read_csv("letter-recognition.csv") dft = df # set of parameters to test param_grid = {"n_estimators": [1,5,10,15,30,40,50], "learning_rate": [.1, .5, 1.] } offset = int(0.7 * len(df)) lr_data_train = preprocessing.normalize(dft.ix[:offset, 1:]) lr_target_train = dft.ix[:offset, 0] lr_data_test = preprocessing.normalize(dft.ix[offset:, 1:]) lr_target_test = dft.ix[offset:, 0] classifier = tree.DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=1) dt = ensemble.AdaBoostClassifier(classifier) ts_gs = util.run_gridsearch(lr_data_train, lr_target_train, dt, param_grid, cv=10)