def test_boston(self): from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn model = DecisionTreeRegressor(max_n_splits=3) model_sklearn = DecisionTreeRegressorSklearn() dataset = load_boston() mse = [] mse_sklearn = [] for fold in range(5): X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.33) model.fit(X_train, y_train) y = model.predict(X_test) mse.append(mean_squared_error(y, y_test)) model_sklearn.fit(X_train, y_train) y = model_sklearn.predict(X_test) mse_sklearn.append(mean_squared_error(y, y_test)) mean_mse = np.mean(mse) mean_mse_sklearn = np.mean(mse_sklearn) print(mean_mse, mean_mse_sklearn) # Check that our model differs in MSE no worse than 20% self.assertTrue( np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.2)
def test_boston(self): from sklearn.tree import DecisionTreeRegressor as DecisionTreeRegressorSklearn model = DecisionTreeRegressor(tree_type='oblivious', max_n_splits=3) model_sklearn = DecisionTreeRegressorSklearn() dataset = load_boston() mse = [] mse_sklearn = [] for fold in range(5): X_train, X_test, y_train, y_test = train_test_split( dataset.data, dataset.target, test_size=0.33) model.fit(X_train, y_train) y = model.predict(X_test) mse.append(mean_squared_error(y, y_test)) model_sklearn.fit(X_train, y_train) y = model_sklearn.predict(X_test) mse_sklearn.append(mean_squared_error(y, y_test)) mean_mse = np.mean(mse) mean_mse_sklearn = np.mean(mse_sklearn) print(mean_mse, mean_mse_sklearn) # Check that our model differs in MSE no worse than 50% self.assertTrue(np.abs(mean_mse - mean_mse_sklearn) / mean_mse_sklearn < 0.5)
import numpy as np from sklearn.cross_validation import train_test_split from sklearn.datasets import load_boston from sklearn.metrics import mean_squared_error from pines.estimators import DecisionTreeRegressor from pines.tree_builders import TreeType if __name__ == '__main__': model = DecisionTreeRegressor(max_n_splits=10, max_depth=4, tree_type=TreeType.OBLIVIOUS) dataset = load_boston() mse = [] for fold in range(4): X_train, X_test, y_train, y_test = train_test_split( dataset.data, dataset.target, test_size=0.33) model.fit(X_train, y_train) # print(model._tree) y = model.predict(X_test) mse.append(mean_squared_error(y, y_test)) mean_mse = np.mean(mse) print(mean_mse)
import numpy as np from sklearn.cross_validation import train_test_split from sklearn.datasets import load_boston from sklearn.metrics import mean_squared_error from pines.estimators import DecisionTreeRegressor from pines.tree_builders import TreeType if __name__ == '__main__': model = DecisionTreeRegressor(max_n_splits=10, max_depth=4, tree_type=TreeType.OBLIVIOUS) dataset = load_boston() mse = [] for fold in range(4): X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.33) model.fit(X_train, y_train) # print(model._tree) y = model.predict(X_test) mse.append(mean_squared_error(y, y_test)) mean_mse = np.mean(mse) print(mean_mse)