import arboretum import numpy as np from sklearn.datasets import load_boston import xgboost def rmse(y, y_hat): diff = np.power(y - y_hat, 2) return np.sqrt(np.sum(diff)) # load test data boston = load_boston() n = 10000 # create data matrix data = arboretum.DMatrix(boston.data[0:n], y=boston.target[0:n]) y = boston.target[0:n] # init model model = arboretum.Garden('reg:linear', data, 6, 2, 1, 0.5) # grow trees for i in xrange(5): model.grow_tree() # predict on train data set pred = model.predict(data) # print first n records print pred[0:10]
import arboretum import numpy as np from sklearn.datasets import load_iris import xgboost import json # load test data iris = load_iris() n = 10000 index = iris.target != 2 y = iris.target[index][0:n] # create data matrix data = arboretum.DMatrix(iris.data[index, 0:n], y=y) config = json.dumps({ 'objective': 1, 'verbose': { 'gpu': True }, 'tree': { 'eta': 0.2, 'max_depth': 6, 'gamma': 0.0, 'min_child_weight': 2.0, 'min_leaf_size': 0, 'colsample_bytree': 1.0, 'colsample_bylevel': 1.0, 'lambda': 0.0, 'alpha': 0.0
'tree': { 'eta': 0.01, 'max_depth': 10, 'gamma': 0.0, 'min_child_weight': 20.0, 'min_leaf_size': 0, 'colsample_bytree': 0.6, 'colsample_bylevel': 0.6, 'lambda': 0.1, 'gamma_relative': 0.0001 }}) print(config) data = arboretum.DMatrix(data, data_category=data_categoties, y=labels) data_val = arboretum.DMatrix(data_val, data_category=data_categoties_val) model = arboretum.Garden(config, data) print('training...') # grow trees for i in range(7400): print('tree', i) model.grow_tree() model.append_last_tree(data_val) if i % 20 == 0: pred = model.get_y(data) print('train', sklearn.metrics.log_loss(labels, pred, eps=1e-6), roc_auc_score(labels, pred))
# load test data boston = load_boston() n = 4000 categoties = [3] data_categories = [] for item in categoties: data_categories.append(convert2category(boston.data[:, item])) data_categories = np.stack(data_categories, axis=-1) data_source = boston.data[:, 4:5] # create data matrix data = arboretum.DMatrix(data_source[0:n], data_category=data_categories, y=boston.target[0:n]) y = boston.target[0:n] config = json.dumps({ 'objective': 0, 'internals': { 'double_precision': True, 'compute_overlap': 2 }, 'verbose': { 'gpu': True }, 'tree': { 'eta': 0.5, 'max_depth': 10,
import arboretum import numpy as np from sklearn.datasets import load_iris import xgboost import json # load test data iris = load_iris() n = 10000 y = iris.target[0:n] # create data matrix data = arboretum.DMatrix(iris.data[:, 0:n], labels=y) config = json.dumps({ 'objective': 3, 'verbose': { 'gpu': True, 'booster': True }, 'tree': { 'labels_count': 3, 'eta': 0.2, 'max_depth': 6, 'gamma': 0.0, 'min_child_weight': 2.0, 'min_leaf_size': 0, 'colsample_bytree': 1.0, 'colsample_bylevel': 1.0, 'lambda': 0.0, 'alpha': 0.0,