def main(maxFeatures=30, maxDepth=8): print "maxFeatures:", maxFeatures print "maxDepth :", maxDepth baseDir = globalConst.BASE_DIR params = { 'max_depth': maxDepth, 'subsample': 0.5, 'verbose': 2, 'random_state': 0, 'min_samples_split': 20, 'min_samples_leaf': 20, 'max_features': maxFeatures, 'n_estimators': 500, 'learning_rate': 0.05 } #'n_estimators': 12000, 'learning_rate': 0.002} clf = GradientBoostingClassifier(**params) # NOTE: first pass, no orderFile; 2nd pass, use orderfiles test = Classify(trainFile=baseDir + 'workspace/trainMetrics.csv', orderFile=useIfExists(baseDir + '/moby/corr32.csv')) test.validate(clf=clf, nFolds=2, featureImportance=True, outFile=baseDir + 'moby/trainPredictions.csv') test.testAndOutput(clf=clf, testFile=baseDir + 'workspace/testMetrics.csv', orderFile=useIfExists(baseDir + '/moby/testCorr32.csv'), outfile=baseDir + 'moby/testPredictions.sub') # NOTE .sub, not .csv
def main(maxFeatures=30, maxDepth=8): print "maxFeatures:", maxFeatures print "maxDepth :", maxDepth baseDir = globalConst.BASE_DIR params = { "max_depth": maxDepth, "subsample": 0.5, "verbose": 2, "random_state": 0, "min_samples_split": 20, "min_samples_leaf": 20, "max_features": maxFeatures, "n_estimators": 500, "learning_rate": 0.05, } #'n_estimators': 12000, 'learning_rate': 0.002} clf = GradientBoostingClassifier(**params) # NOTE: first pass, no orderFile; 2nd pass, use orderfiles test = Classify( trainFile=baseDir + "workspace/trainMetrics.csv", orderFile=useIfExists(baseDir + "/moby/corr32.csv") ) test.validate(clf=clf, nFolds=2, featureImportance=True, outFile=baseDir + "moby/trainPredictions.csv") test.testAndOutput( clf=clf, testFile=baseDir + "workspace/testMetrics.csv", orderFile=useIfExists(baseDir + "/moby/testCorr32.csv"), outfile=baseDir + "moby/testPredictions.sub", ) # NOTE .sub, not .csv
def main(): baseDir = '/Users/nkridler/Desktop/whale/' params = {'max_depth':8, 'subsample':0.5, 'verbose':2, 'random_state':0 'min_samples_split':20, 'min_samples_leaf':20, 'max_features':30, 'n_estimators': 500, 'learning_rate': 0.05} #'n_estimators': 12000, 'learning_rate': 0.002} clf = GradientBoostingClassifier(**params) test = Classify(baseDir+'workspace/trainMetrics.csv') test.validate(clf,nFolds=2,featureImportance=True)