def benchmark_hybrid(dataset, bfs_threshold=None): x_train, y_train = load_data(dataset) # Just use this forest to compile the code. throw_away = hybridForest(n_estimators=1, bootstrap=bootstrap, max_features=None, cpu_classifier=WiseRF) throw_away.fit(x_train, y_train, bfs_threshold=bfs_threshold) with timer("%s benchmark hybrid (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = hybridForest(n_estimators=n_estimators, bootstrap=bootstrap, max_features=None, cpu_classifier=WiseRF) forest.fit(x_train, y_train, bfs_threshold=bfs_threshold) forest = None
def benchmark_cuda(dataset, bfs_threshold = None): x_train, y_train = load_data(dataset) #Just use this forest to compile the code. throw_away = RandomForestClassifier(n_estimators = 1, bootstrap = bootstrap, verbose = False, max_features = None, debug = debug) throw_away.fit(x_train, y_train, bfs_threshold = bfs_threshold) with timer("%s benchmark cuda (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = RandomForestClassifier(n_estimators = n_estimators, bootstrap = bootstrap, verbose = verbose, max_features = None, debug = debug) forest.fit(x_train, y_train, bfs_threshold = bfs_threshold) forest = None
def benchmark_hybrid(dataset, bfs_threshold=None): x_train, y_train = load_data(dataset) #Just use this forest to compile the code. throw_away = hybridForest(n_estimators=2, bootstrap=bootstrap, max_features=None) throw_away.fit(x_train, y_train, bfs_threshold=bfs_threshold) with timer("%s benchmark hybrid (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = hybridForest(n_estimators=n_estimators, bootstrap=bootstrap, n_jobs=2, max_features=None) forest.fit(x_train, y_train, bfs_threshold=bfs_threshold) forest = None
def benchmark_cuda(dataset, bfs_threshold=None): x_train, y_train = load_data(dataset) #Just use this forest to compile the code. throw_away = RandomForestClassifier(n_estimators=1, bootstrap=bootstrap, verbose=False, max_features=None, debug=debug) throw_away.fit(x_train, y_train, bfs_threshold=bfs_threshold) with timer("%s benchmark cuda (bfs_threshold = %s)" % (dataset, bfs_threshold)): forest = RandomForestClassifier(n_estimators=n_estimators, bootstrap=bootstrap, verbose=verbose, max_features=None, debug=debug) forest.fit(x_train, y_train, bfs_threshold=bfs_threshold) forest = None
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("covtype") x = x[:10000] y = y[:10000] def test_covtype_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap=False) forest.fit(x, y, bfs_threshold=500000) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % ( diff, total, diff / float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from helpers import compare_accuracy, compare_hybrid_accuracy def test_covtype_accuracy(): compare_accuracy(x, y) compare_hybrid_accuracy(x, y) if __name__ == "__main__": test_covtype_memorize() test_covtype_accuracy()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("digits") n_estimators = 13 bootstrap = True def test_digits_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(n_estimators=n_estimators / 2, bootstrap=False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print("%s (Wrong) / %s (Total). The error rate is %f." % (diff, total, diff / float(total))) assert diff == 0, "Didn't memorize, got %d wrong" % diff from .helpers import compare_accuracy def test_digits_vs_sklearn(): compare_accuracy(x, y) if __name__ == "__main__": test_digits_memorize()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x,y = load_data("digits") n_estimators = 13 bootstrap = True def test_digits_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(n_estimators = n_estimators/2, bootstrap = False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s (Wrong) / %s (Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't memorize, got %d wrong" % diff from helpers import compare_accuracy def test_digits_vs_sklearn(): compare_accuracy(x,y) if __name__ == "__main__": test_digits_memorize() test_digits_vs_sklearn()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("diabetes") def test_diabetes_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap=False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % ( diff, total, diff / float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from helpers import compare_accuracy, compare_hybrid_accuracy def test_diabetes_accuracy(): compare_accuracy(x, y) compare_hybrid_accuracy(x, y) if __name__ == "__main__": test_diabetes_memorize() test_diabetes_accuracy()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("iris") def test_iris_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap=False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print("%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff / float(total))) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from .helpers import compare_accuracy def test_iris_accuracy(): compare_accuracy(x, y) if __name__ == "__main__": test_iris_memorize() test_iris_accuracy()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("diabetes") def test_diabetes_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap = False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from helpers import compare_accuracy, compare_hybrid_accuracy def test_diabetes_accuracy(): compare_accuracy(x,y) compare_hybrid_accuracy(x,y) if __name__ == "__main__": test_diabetes_memorize() test_diabetes_accuracy()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("covtype") x = x[:10000] y = y[:10000] def test_covtype_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap = False) forest.fit(x, y, bfs_threshold = 500000) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from helpers import compare_accuracy, compare_hybrid_accuracy def test_covtype_accuracy(): compare_accuracy(x,y) compare_hybrid_accuracy(x, y) if __name__ == "__main__": test_covtype_memorize() test_covtype_accuracy()
import numpy as np from cudatree import load_data, RandomForestClassifier, timer from cudatree import util x, y = load_data("iris") def test_iris_memorize(): with timer("Cuda treelearn"): forest = RandomForestClassifier(bootstrap = False) forest.fit(x, y) with timer("Predict"): diff, total = util.test_diff(forest.predict(x), y) print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)) assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff from helpers import compare_accuracy def test_iris_accuracy(): compare_accuracy(x,y) if __name__ == "__main__": test_iris_memorize() test_iris_accuracy()