def test_basic_binary_classification(): X, Y = datasets.make_classification(n_classes=2, n_samples=1000) trees = emtrees.RandomForest(n_estimators=10, max_depth=10) X = (X * 2**16).astype(int) # convert to integer scores = model_selection.cross_val_score(trees, X, Y, scoring='accuracy') assert numpy.mean(scores) > 0.7, scores
def test_trees_to_dot(): X, Y = datasets.make_classification(n_classes=2, n_samples=10) trees = emtrees.RandomForest(n_estimators=3, max_depth=5) X = (X * 2**16).astype(int) # convert to integer trees.fit(X, Y) dot = trees.to_dot(name='ffoo') with open('tmp/trees.dot', 'w') as f: f.write(dot)
def test_binary_classification_compiled(): X, Y = datasets.make_classification(n_classes=2) trees = emtrees.RandomForest(n_estimators=3, max_depth=5) X = (X * 2**16).astype(int) # convert to integer trees.fit(X, Y) p = build_classifier(trees) predicted = run_classifier(p, X) accuracy = metrics.accuracy_score(Y, predicted) assert accuracy > 0.9 # testing on training data
def test_inline_compiled(): X, Y = datasets.make_classification(n_classes=2, random_state=1) trees = emtrees.RandomForest(n_estimators=3, max_depth=5, random_state=1) X = (X * 2**16).astype(int) # convert to integer trees.fit(X, Y) p = build_classifier(trees, 'myinline', func='myinline_predict(values, length)') predicted = run_classifier(p, X) accuracy = metrics.accuracy_score(Y, predicted) assert accuracy > 0.9 # testing on training data
digits = datasets.load_digits() Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target, random_state=rnd) Xtrain = (Xtrain * 2**16).astype(numpy.int32) Xtest = (Xtest * 2**16).astype(numpy.int32) print('Loading digits dataset. 8x8=64 features') # 0.95+ with n_estimators=40, max_depth=20 # 0.90+ with n_estimators=10, max_depth=10 trees = 40 max_depth = 20 print('Training {} trees with max_depth {}'.format(trees, max_depth)) model = emtrees.RandomForest(n_estimators=trees, max_depth=max_depth, random_state=rnd) model.fit(Xtrain, ytrain) # Predict ypred = model.predict(Xtest) print('Accuracy on validation set {:.2f}%'.format( metrics.accuracy_score(ypred, ytest) * 100)) m = numpy.max(Xtrain), numpy.min(Xtrain) code = model.output_c('digits') filename = 'digits.h' with open(filename, 'w') as f: f.write(code) print('Wrote C code to', filename)
# load and prepare data filename = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data' df = pandas.read_csv(filename, header=None) target_columns = [60] data_columns = list(set(list(df.columns)).difference(target_columns)) # convert floats to integer df[data_columns] = (df[data_columns] * 2**16).astype(int) df[target_columns] = df[target_columns[0]].astype('category').cat.codes X_train, X_test, Y_train, Y_test = train_test_split(df[data_columns], df[target_columns]) # Run training n_trees = 5 estimator = emtrees.RandomForest(n_estimators=n_trees, max_depth=10) estimator.fit(X_train, Y_train) Y_pred = estimator.predict(X_test) a = accuracy_score(Y_test, Y_pred) print('Trees: %d' % n_trees) print('Mean Accuracy: %.3f%%' % (a * 100)) # Output C code code = estimator.output_c('sonar') outfile = 'sonar.h' with open(outfile, 'w') as f: f.write(code) print('Wrote C code to', outfile)