Exemplo n.º 1
0
class EXTRATREE():
    """docstring for ClassName"""
    def __init__(self, ExtraTreesRegressor, N):
        self.cores_number = int(np.ceil(multiprocessing.cpu_count() / N))

        self.model = ExtraTreesRegressor(bootstrap=False,
                                         criterion='mse',
                                         max_depth=None,
                                         max_features='auto',
                                         max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         n_estimators=200,
                                         n_jobs=self.cores_number,
                                         oob_score=False,
                                         random_state=None,
                                         verbose=True,
                                         warm_start=False)

        print("ExtraTreesRegressor Cores: ", self.cores_number)

    def fit(self, X_train, y_train, X_test, y_test, error_type="MAE"):

        error_dict = {"MSE": "mse", "MAE": "mae"}
        error_metric = error_dict[error_type]
        self.model.set_params(criterion=error_metric)
        self.model.fit(X_train, y_train)

    def predict(self, X_test):
        prediction = self.model.predict(X_test)
        return (prediction)
    def __init__(self, nb_classes, bags=1, params={}):

        # params
        self.params = params

        #common
        self.nb_classes = nb_classes
        self.bags = bags
        self.bags_models = tuple()
        self.train_y = None

        for bag in range(self.bags):
            model = ExtraTreesRegressor()
            model.set_params(**self.params)
            model.set_params(random_state = (self.params['random_state'] + bag))
            self.bags_models = self.bags_models + (model, )
Exemplo n.º 3
0
        def Extra_trees_regression(X_train, y_train, X_test, params):
            # Случайный поиск по сетке
            if hyperparameters == 'RandomGridSearch':
                # Осуществляем поиск по сетке с кросс-валидацией (число фолдов равно 3)
                max_depth = [5, 10, 15, 20, 25]
                min_samples_split = [2, 5, 10]
                max_leaf_nodes = [10, 50, 100]
                param_grid = {'max_depth': max_depth, 'min_samples_split': min_samples_split, 'max_leaf_nodes': max_leaf_nodes}
                # Задаем модель, которую будем обучать
                estimator = ExtraTreesRegressor(n_estimators = 200, n_jobs = 8)
                # Производим обучение модели с заданными вариантами параметров (осуществляем поиск по сетке)
                optimizer = RandomizedSearchCV(estimator, param_grid, n_iter = 5, cv = 3, iid = 'deprecated', scoring = 'neg_mean_absolute_error')
                optimizer.fit(X_train, y_train)
                regression = optimizer.best_estimator_
                predicted = regression.predict(X_test)
                validation_score = optimizer.best_score_
            # Полный поиск по сетке
            elif hyperparameters == 'GridSearch':
                max_depth = [5, 10, 15, 20, 25]
                min_samples_split = [2, 5, 10]
                max_leaf_nodes = [10, 50, 100]
                param_grid = {'max_depth': max_depth, 'min_samples_split': min_samples_split,'max_leaf_nodes': max_leaf_nodes}
                # Задаем модель, которую будем обучать
                estimator = ExtraTreesRegressor(n_estimators = 200, n_jobs = 8)
                # Производим обучение модели с заданными вариантами параметров (осуществляем поиск по сетке)
                optimizer = GridSearchCV(estimator, param_grid, cv = 3, iid = 'deprecated', scoring = 'neg_mean_absolute_error')
                optimizer.fit(X_train, np.ravel(y_train))
                regression = optimizer.best_estimator_
                predicted = regression.predict(X_test)
                validation_score = optimizer.best_score_
            elif hyperparameters == 'Custom':
                estimator = ExtraTreesRegressor()
                # Задаем нужные параметры
                estimator.set_params(**params)

                # Проверка по кросс-валидации
                fold = KFold(n_splits = 3, shuffle = True)
                validation_score = cross_val_score(estimator = estimator, X = X_train, y = y_train, cv = fold, scoring = 'neg_mean_absolute_error')

                # Обучаем модель уже на всех данных
                estimator.fit(X_train, np.ravel(y_train))
                predicted = estimator.predict(X_test)
            return(predicted, validation_score)
Exemplo n.º 4
0
        def extra_trees_regression(X_train,
                                   y_train,
                                   X_test,
                                   params,
                                   use_cv: bool = True):
            # If there are not enough points for cross validation
            if use_cv is False:
                if params is None:
                    model = ExtraTreesRegressor()
                else:
                    model = ExtraTreesRegressor(**params)
                model.fit(X_train, y_train)
                predicted = model.predict(X_test)

                # Calculate score on train
                train_predicted = model.predict(X_train)
                validation_score = mean_absolute_error(
                    np.ravel(y_train), np.ravel(train_predicted))
                return predicted, validation_score

            # Random grid search
            if hyperparameters == 'RandomGridSearch':
                # Carry out a random grid search with cross-validation (the number of folds is 3)
                max_depth = [5, 10, 15, 20, 25]
                min_samples_split = [2, 5, 10]
                max_leaf_nodes = [10, 50, 100]
                param_grid = {
                    'max_depth': max_depth,
                    'min_samples_split': min_samples_split,
                    'max_leaf_nodes': max_leaf_nodes
                }
                # Set the model to be trained
                estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1)
                # Train the model with the given options of parameters
                optimizer = RandomizedSearchCV(
                    estimator,
                    param_grid,
                    n_iter=5,
                    cv=3,
                    iid='deprecated',
                    scoring='neg_mean_absolute_error')
                optimizer.fit(X_train, y_train)
                regression = optimizer.best_estimator_
                predicted = regression.predict(X_test)
                validation_score = optimizer.best_score_
            # Full grid search
            elif hyperparameters == 'GridSearch':
                max_depth = [5, 10, 15, 20, 25]
                min_samples_split = [2, 5, 10]
                max_leaf_nodes = [10, 50, 100]
                param_grid = {
                    'max_depth': max_depth,
                    'min_samples_split': min_samples_split,
                    'max_leaf_nodes': max_leaf_nodes
                }
                # Set the model to be trained
                estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1)
                # Train the model with the given options of parameters
                optimizer = GridSearchCV(estimator,
                                         param_grid,
                                         cv=3,
                                         iid='deprecated',
                                         scoring='neg_mean_absolute_error')
                optimizer.fit(X_train, np.ravel(y_train))
                regression = optimizer.best_estimator_
                predicted = regression.predict(X_test)
                validation_score = optimizer.best_score_
            elif hyperparameters == 'Custom':
                estimator = ExtraTreesRegressor()
                # Set the params
                estimator.set_params(**params)

                # Cross-validation
                fold = KFold(n_splits=3, shuffle=True)
                validation_score = cross_val_score(
                    estimator=estimator,
                    X=X_train,
                    y=y_train,
                    cv=fold,
                    scoring='neg_mean_absolute_error')
                estimator.fit(X_train, np.ravel(y_train))
                predicted = estimator.predict(X_test)
            return predicted, validation_score
Exemplo n.º 5
0
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
import pickle

x, y = datahelper.get_xy('data/', num_hours=3, error_minutes=15)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

extra_trees = ExtraTreesRegressor(
    n_jobs=-1)  # n_jobs=-1 is used for parallel calculations without limit

estimators = np.arange(10, 1000, 10)
scores = dict()
current_index = 0
for n in estimators:
    extra_trees.set_params(n_estimators=n)
    extra_trees.fit(x_train, y_train)
    scores[current_index] = extra_trees.score(x_test, y_test)
    current_index += 1

sorted_by_scores = [(k, scores[k])
                    for k in sorted(scores, key=scores.get, reverse=True)]

index, score = sorted_by_scores[0]
extra_trees.set_params(n_estimators=estimators[index])
extra_trees.fit(x_train, y_train)

filename = '../models/extra_trees_model.sav'
print('Saving model with ', estimators[index], ' estimators to file ',
      filename)
with open(filename, 'wb') as h:
Exemplo n.º 6
0
import ch9util
from tempfile import mkdtemp
import os
import joblib

X_train, X_test, y_train, y_test = ch9util.temp_split()
params = {
    'min_samples_split': [1, 3],
    'bootstrap': [True, False],
    'min_samples_leaf': [3, 4]
}

gscv = GridSearchCV(ExtraTreesRegressor(random_state=41),
                    param_grid=params,
                    cv=5)

gscv.fit(X_train, y_train)
preds = gscv.predict(X_test)
ch9util.npy_save('etr.npy', preds)
dir = mkdtemp()
pkl = os.path.join(dir, 'params.pkl')
joblib.dump(gscv.best_params_, pkl)
params = joblib.load(pkl)
print('Best params', gscv.best_params_)
print('From pkl', params)
est = ExtraTreesRegressor(random_state=41)
est.set_params(**params)
est.fit(X_train, y_train)
preds2 = est.predict(X_test)
print('Max diff', (preds - preds2).max())
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import ExtraTreesRegressor
import ch9util
from tempfile import mkdtemp
import os
import joblib

X_train, X_test, y_train, y_test = ch9util.temp_split()
params = {'min_samples_split': [1, 3],
          'bootstrap': [True, False],
          'min_samples_leaf': [3, 4]}

gscv = GridSearchCV(ExtraTreesRegressor(random_state=41),
                    param_grid=params, cv=5)

gscv.fit(X_train, y_train)
preds = gscv.predict(X_test)
ch9util.npy_save('etr.npy', preds)
dir = mkdtemp()
pkl = os.path.join(dir, 'params.pkl')
joblib.dump(gscv.best_params_, pkl)
params = joblib.load(pkl)
print('Best params', gscv.best_params_)
print('From pkl', params)
est = ExtraTreesRegressor(random_state=41)
est.set_params(**params)
est.fit(X_train, y_train)
preds2 = est.predict(X_test)
print('Max diff', (preds - preds2).max())