def compare_accuracy(x, y, n_estimators=11, bootstrap=True, slop=0.98, n_repeat=10): n = x.shape[0] / 2 xtrain = x[:n] ytrain = y[:n] xtest = x[n:] ytest = y[n:] cudarf = RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap) import sklearn.ensemble skrf = sklearn.ensemble.RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap) cuda_score_total = 0 sk_score_total = 0 for i in xrange(n_repeat): cudarf.fit(xtrain, ytrain) skrf.fit(xtrain, ytrain) sk_score = skrf.score(xtest, ytest) cuda_score = cudarf.score(xtest, ytest) print "Iteration", i print "Sklearn score", sk_score print "CudaTree score", cuda_score sk_score_total += sk_score cuda_score_total += cuda_score assert cuda_score_total >= (sk_score_total * slop), ( "Getting significantly worse test accuracy than sklearn: %s vs. %s" % (cuda_score_total / n_repeat, sk_score_total / n_repeat) )
def compare_accuracy(x, y, n_estimators=11, bootstrap=True, slop=0.98, n_repeat=10): n = x.shape[0] / 2 xtrain = x[:n] ytrain = y[:n] xtest = x[n:] ytest = y[n:] cudarf = RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap) import sklearn.ensemble skrf = sklearn.ensemble.RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap) cuda_score_total = 0 sk_score_total = 0 for i in xrange(n_repeat): cudarf.fit(xtrain, ytrain) skrf.fit(xtrain, ytrain) sk_score = skrf.score(xtest, ytest) cuda_score = cudarf.score(xtest, ytest) print "Iteration", i print "Sklearn score", sk_score print "CudaTree score", cuda_score sk_score_total += sk_score cuda_score_total += cuda_score assert cuda_score_total >= (sk_score_total * slop), \ "Getting significantly worse test accuracy than sklearn: %s vs. %s"\ % (cuda_score_total / n_repeat, sk_score_total / n_repeat)
def test_digits_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(n_estimators = n_estimators/2, bootstrap = False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s (Wrong) / %s (Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't memorize, got %d wrong" % diff
def test_covtype_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap = False) forest.fit(x, y, bfs_threshold = 500000) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff
def test_covtype_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap = False) forest.fit(x, y, bfs_threshold = 500000) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print("%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total))) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff
def benchmark_cuda(dataset, bfs_threshold=None): x_train, y_train = load_data(dataset) #Just use this forest to compile the code. throw_away = RandomForestClassifier(n_estimators=1, bootstrap=bootstrap, verbose=False, max_features=None, debug=debug) throw_away.fit(x_train, y_train, bfs_threshold=bfs_threshold) with timer("%s benchmark cuda (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap, verbose=verbose, max_features=None, debug=debug) forest.fit(x_train, y_train, bfs_threshold=bfs_threshold) forest = None
def benchmark_cuda(dataset, bfs_threshold = None): x_train, y_train = load_data(dataset) #Just use this forest to compile the code. throw_away = RandomForestClassifier(n_estimators = 1, bootstrap = bootstrap, verbose = False, max_features = None, debug = debug) throw_away.fit(x_train, y_train, bfs_threshold = bfs_threshold) with timer("%s benchmark cuda (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = RandomForestClassifier(n_estimators = n_estimators, bootstrap = bootstrap, verbose = verbose, max_features = None, debug = debug) forest.fit(x_train, y_train, bfs_threshold = bfs_threshold) forest = None