def setUp(self): windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(n_estimators=40, min_samples_leaf=10) rf2 = RandomForestConfig(n_estimators=40, min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] self.mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[MaxPooling(), MaxPooling()], [MaxPooling(), MaxPooling()]] self.poolayer = PoolingLayer(pools=pools) self.concat_layer = ConcatLayer() self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] self.cascade = CascadeLayer(est_configs=self.est_configs, n_classes=10, keep_in_mem=True, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade')) self.auto_cascade = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=3, data_save_rounds=4, stop_by_test=True, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'))
def test_determine_split_uniform(self, dis_level=3): ests = [ RandomForestConfig(n_estimators=500).get_est_args(), RandomForestConfig(n_estimators=500).get_est_args(), ExtraRandomForestConfig(n_estimators=500).get_est_args(), ExtraRandomForestConfig(n_estimators=500).get_est_args() ] should_split, split_scheme = determine_split(50, 3, ests) print(should_split, split_scheme)
def determine_split3(self, dis_level=3): ests = [ RandomForestConfig(n_estimators=200).get_est_args(), RandomForestConfig(n_estimators=200).get_est_args(), ExtraRandomForestConfig(n_estimators=200).get_est_args(), ExtraRandomForestConfig(n_estimators=200).get_est_args() ] should_split, split_scheme = determine_split(dis_level, 3, ests) if dis_level == 3: assert should_split is True assert split_scheme == [[134, 66], [134, 66], [134, 66], [134, 66]] elif dis_level == 2: assert should_split is True # print(split_scheme) assert split_scheme == [[100, 100], [100, 100], [100, 100], [100, 100]]
def setUp(self): # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() X = np.reshape(x_train, (60000, -1, 28, 28)) x_train = X[:120, :, :, :] y_train = y_train[:120] x_test = np.reshape(x_test[:60], (60, -1, 28, 28)) y_test = y_test[:60] self.x_train = x_train self.x_test = x_test self.y_train = y_train self.y_test = y_test # print('X_train: ', x_train.shape, 'y: ', y_train.shape) # print(' X_test: ', x_test.shape, 'y: ', y_test.shape) self.estimators = [ RandomForestConfig().get_est_args(), RandomForestConfig().get_est_args(), RandomForestConfig().get_est_args(), ExtraRandomForestConfig().get_est_args(), ExtraRandomForestConfig().get_est_args(), ExtraRandomForestConfig().get_est_args() ]
def _init(self, distribute=False): self.est_configs = [ ExtraRandomForestConfig(n_estimators=20), ExtraRandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20) ] gc = CascadeLayer(est_configs=self.est_configs, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade'), distribute=distribute) agc = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'), distribute=distribute) return gc, agc
def _init(self): self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(min_samples_leaf=10) rf2 = RandomForestConfig(min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(dtype=np.float32, windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[Pooling(2, 2, "max"), Pooling(2, 2, "max")], [Pooling(2, 2, "max"), Pooling(2, 2, "max")]] poolayer = PoolingLayer(pools=pools) concat_layer = ConcatLayer() auto_cascade = AutoGrowingCascadeLayer( est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_graph', 'auto_cascade')) return mgs, poolayer, concat_layer, auto_cascade
(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, -1, 28, 28) x_test = x_test.reshape(10000, -1, 28, 28) x_train = x_train[:200, :, :, :] x_test = x_test[:100, :, :, :] x_train = x_train / 255.0 x_test = x_test / 255.0 y_train = y_train[:200] y_test = y_test[:100] print(x_train.shape, 'train') print(x_test.shape, 'test') rf1 = ExtraRandomForestConfig(min_samples_leaf=10) rf2 = RandomForestConfig(min_samples_leaf=10) windows = [ Window(win_x=7, win_y=7, stride_x=2, stride_y=2, pad_x=0, pad_y=0), Window(10, 10, 2, 2), Window(13, 13, 2, 2) ] est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[MeanPooling(2, 2), MeanPooling(2, 2)],
from forestlayer.estimators.estimator_configs import ExtraRandomForestConfig, RandomForestConfig from forestlayer.layers.layer import AutoGrowingCascadeLayer from forestlayer.layers.graph import Graph from forestlayer.utils.storage_utils import get_data_save_base, get_model_save_base import os.path as osp import time (x_train, y_train, x_test, y_test) = uci_yeast.load_data() start_time = time.time() print('x_train shape: {}'.format(x_train.shape)) print('x_test.shape: {}'.format(x_test.shape)) est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'uci_yeast') model_save_dir = osp.join(get_model_save_base(), 'uci_yeast') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=10,
# Copyright 2017 Authors NJU PASA BigData Laboratory. # Authors: Qiu Hu <huqiu00#163.com> # License: Apache-2.0 import numpy as np import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import AdaBoostRegressor from forestlayer.estimators.estimator_configs import RandomForestConfig, ExtraRandomForestConfig from forestlayer.layers.layer import AutoGrowingCascadeLayer rng = np.random.RandomState(1) X = np.linspace(0, 6, 100)[:, np.newaxis] y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0]) est_configs = [RandomForestConfig(), ExtraRandomForestConfig()] cascade = AutoGrowingCascadeLayer(task='regression', est_configs=est_configs, early_stopping_rounds=3, keep_in_mem=True) cascade.fit(X, y) y1 = cascade.predict(X) y1 = y1.reshape(-1) abr = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300, random_state=rng) abr.fit(X, y) y2 = abr.predict(X)
from forestlayer.layers.factory import MGSWindow, MeanPooling from forestlayer.layers.layer import MultiGrainScanLayer, AutoGrowingCascadeLayer, PoolingLayer, ConcatLayer from forestlayer.layers.graph import Graph from forestlayer.estimators.estimator_configs import ExtraRandomForestConfig, RandomForestConfig, Basic4x2 x_train, y_train, x_test, y_test = uci_sEMG.load_data() print("x_train.shape = {}, y_train.shape = {}".format(x_train.shape, y_train.shape)) print("x_test.shape = {}, y_test.shape = {}".format(x_test.shape, y_test.shape)) windows = [MGSWindow((1, 157)), MGSWindow((1, 375)), MGSWindow((1, 750))] rf1 = ExtraRandomForestConfig(n_folds=3, min_samples_leaf=10, max_features='auto') rf2 = RandomForestConfig(n_folds=3, min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=6) pools = [[MeanPooling(), MeanPooling()], [MeanPooling(), MeanPooling()], [MeanPooling(), MeanPooling()]] pool_layer = PoolingLayer(pools=pools)
x_test = x_test.transpose((0, 3, 1, 2)) y_train = y_train.reshape((y_train.shape[0])) y_test = y_test.reshape((y_test.shape[0])) x_train = x_train.reshape(50000, -1, 32, 32) x_test = x_test.reshape(10000, -1, 32, 32) x_train = x_train[:200, :, :, :] x_test = x_test[:100, :, :, :] y_train = y_train[:200] y_test = y_test[:100] print(x_train.shape, y_train.shape, 'train') print(x_test.shape, y_test.shape, 'test') rf1 = ExtraRandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10, max_features='auto') rf2 = RandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10) windows = [ Window(win_x=8, win_y=8, stride_x=2, stride_y=2, pad_x=0, pad_y=0), Window(11, 11, 2, 2), Window(16, 16, 2, 2) ] est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] data_save_dir = osp.join(get_data_save_base(), 'cifar10') model_save_dir = osp.join(get_model_save_base(), 'cifar10') mgs = MultiGrainScanLayer(windows=windows,
from forestlayer.estimators.estimator_configs import RandomForestConfig, ExtraRandomForestConfig from forestlayer.layers.layer import AutoGrowingCascadeLayer from forestlayer.layers.graph import Graph import ray import time """Stand alone mode""" ray.init() """Cluster mode""" # ray.init(redis_address="192.168.x.x:6379") (x_train, y_train, x_test, y_test) = uci_adult.load_data() start_time = time.time() est_configs = [ ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1) ] auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=2, distribute=True, seed=0)
(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, -1, 28, 28) x_test = x_test.reshape(10000, -1, 28, 28) # small data for example. x_train = x_train[:600, :, :, :] x_test = x_test[:300, :, :, :] y_train = y_train[:600] y_test = y_test[:300] print(x_train.shape, 'train', x_train.dtype, getmbof(x_train)) print(x_test.shape, 'test', x_test.dtype, getmbof(x_test)) rf1 = ExtraRandomForestConfig(n_jobs=-1, min_samples_leaf=10, max_features="auto") rf2 = RandomForestConfig(n_jobs=-1, min_samples_leaf=10) windows = [ Window(win_x=7, win_y=7, stride_x=2, stride_y=2, pad_x=0, pad_y=0), Window(10, 10, 2, 2), Window(13, 13, 2, 2) ] est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10, distribute=True,