(X, Y, Xt) = boilerplate.loadData() clf = GradientBoostingClassifier() params = {"loss": ['deviance'], #"loss": ['deviance', 'exponential'] "n_estimators": [180, 200, 220, 240, 260, 280], "max_depth": [12, 18, 24, 30], "max_features": [15, 30, 45, 60], "min_samples_split": [1], "min_samples_leaf": [3, 10], "verbose": [1]} search = GridSearchCV(clf, param_grid=params, n_jobs = -1) search.fit(X, Y) best = search.best_estimator_ scores = cross_validation.cross_val_score(best, X, Y, cv=5) boilerplate.writeData('predictionsMGS_GB2.csv', best, Xt) with open('Log.txt', 'a') as f: f.write('GridSearch_GradientBoost run at: ' + time.strftime("%H:%M:%S") \ + '\n') f.write('Status of GridSearch_GradientBoost:\n') f.write('Best GradientBoost in search has score:\n') f.write(str(sum(scores) / len(scores)) + '\n') f.write('The parameters of the best estimator:\n') f.write(str(best.get_params) + '\n') f.write('Done. Time taken (seconds):\n') f.write(str(time.time() - start) + '\n') print('Best Gradient Boost Classifier in search has score:') print(sum(scores) / len(scores)) print('The parameters of the best estimator:')
use_idf=True, smooth_idf=False, sublinear_tf=True) X = trf.fit_transform(X) Xt = trf.transform(Xt) clf = SVC() params = {"C": [0.1 * np.e**i for i in range(10)], "kernel": ['rbf', 'linear', 'poly', 'sigmoid'], "shrinking": [True, False], "tol": [1e-3, 1e-4, 1e-5, 1e-6], "verbose": [True]} search = GridSearchCV(clf, param_grid=params, n_jobs=-1) search.fit(X, Y) best = search.best_estimator_ scores = cross_validation.cross_val_score(best, X, Y, cv=5) boilerplate.writeData('predictionsGS_SVM3.csv', best, Xt) with open('Log.txt', 'a') as f: f.write('--------------------------------------\n') f.write('GridSearch_TFIDF_SVC run at: ' + time.strftime("%H:%M:%S") \ + '\n') f.write('Status of GridSearch_TFIDF_SVC:\n') f.write('Best SVC in search has score:\n') f.write(str(sum(scores) / len(scores)) + '\n') f.write('The parameters of the best estimator:\n') f.write(str(best.get_params) + '\n') f.write('Done. Time taken (seconds):\n') f.write(str(time.time() - start) + '\n') print 'Best SVC in search has score:' print sum(scores) / len(scores)
(X, Y, Xt) = boilerplate.loadData() filename = "LogSGD.txt" clf = SGDClassifier() params = {"loss": ["log"], "penalty": ["elasticnet"], "l1_ratio": [0.005 * i for i in range(5)], "alpha": [0.002 * i for i in range(1, 11)], "n_iter": [50 * i for i in range(1, 9)]} search = GridSearchCV(clf, param_grid=params, n_jobs=-1) search.fit(X, Y) best = search.best_estimator_ scores = cross_validation.cross_val_score(best, X, Y, cv=5) boilerplate.writeData(filename, best, Xt) with open('LogSGD.txt', 'a') as f: f.write('--------------------------------------\n') f.write('GridSearch_SGD run at: ' + time.strftime("%H:%M:%S") \ + '\n') f.write('Status of GridSearch_SGD:\n') f.write('Saved to: ' + filename + '\n') f.write('Best SGD in search has score:\n') f.write(str(sum(scores) / len(scores)) + '\n') f.write('The parameters of the best estimator:\n') f.write(str(best.get_params) + '\n') f.write('Done. Time taken (seconds):\n') f.write(str(time.time() - start) + '\n') print 'Best SGD in search has score:'
"min_samples_leaf": sp_randint(1, 11), "bootstrap": [True, False], "criterion": ["gini"], "verbose": [1], "warm_start": [True, False]} # Run randomized search n_iters = 3 random_search = RandomizedSearchCV(clf, param_distributions=param_dist, cv=5, \ n_iter=n_iters, n_jobs=-1, verbose=1) random_search.fit(X, Y) report(random_search.grid_scores_) best = random_search.best_estimator_ scores = cross_validation.cross_val_score(best, X, Y, cv=5) boilerplate.writeData('predictionsRGS_RF1.csv', best, Xt) with open('Log.txt', 'a') as f: f.write('--------------------------------------\n') f.write('RandomGridSearch_RandomForest run at: ' + time.strftime("%H:%M:%S") \ + '\n') f.write('Status of RandomGridSearch_RandomForest:\n') f.write('Best Random Forest in search has score:\n') f.write(str(sum(scores) / len(scores)) + '\n') f.write('The parameters of the best estimator:\n') f.write(str(best.get_params) + '\n') f.write('Done. Time taken (seconds):\n') f.write(str(time.time() - start) + '\n') print 'Best Random Forest in search has score:' print sum(scores) / len(scores)
"warm_start": [True, False]} ''' params = {"n_estimators": [140, 160, 180, 200, 220], "max_depth": [30, 35, 40, 55, 60], "max_features": [60, 70, 80], "min_samples_split": [1, 2, 4, 8, 16], "min_samples_leaf": [1, 2, 4, 8, 16], "bootstrap": [True], "criterion": ["gini"], "verbose": [1]} ''' search = GridSearchCV(clf, param_grid=params, n_jobs=-1) search.fit(X, Y) best = search.best_estimator_ scores = cross_validation.cross_val_score(best, X, Y, cv=5) boilerplate.writeData('predictionsMGS_TFIDF_RF3.csv', best, Xt) with open('Log.txt', 'a') as f: f.write('--------------------------------------\n') f.write('GridSearch_RandomForest run at: ' + time.strftime("%H:%M:%S") \ + '\n') f.write('Status of GridSearch_RandomForest:\n') f.write('Best Random Forest in search has score:\n') f.write(str(sum(scores) / len(scores)) + '\n') f.write('The parameters of the best estimator:\n') f.write(str(best.get_params) + '\n') f.write('Done. Time taken (seconds):\n') f.write(str(time.time() - start) + '\n') print 'Best Random Forest in search has score:' print sum(scores) / len(scores)