示例#1
0
class ExtraTreesRegressorImpl():

    def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False):
        self._hyperparams = {
            'n_estimators': n_estimators,
            'criterion': criterion,
            'max_depth': max_depth,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'min_weight_fraction_leaf': min_weight_fraction_leaf,
            'max_features': max_features,
            'max_leaf_nodes': max_leaf_nodes,
            'min_impurity_decrease': min_impurity_decrease,
            'min_impurity_split': min_impurity_split,
            'bootstrap': bootstrap,
            'oob_score': oob_score,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'verbose': verbose,
            'warm_start': warm_start}
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
class ExtremelyRandomizeTreeEstimator(Estimator):
    def __init__(self):
        self.estimator = ExtraTreesRegressor(n_estimators=30)
        self.initialized = False

    def __call__(self, state, action):
        if self.initialized:
            x = np.array(state + [action[0], action[1]]).reshape(1, -1)
            return self.estimator.predict(x)[0]
        else:
            return 0

    def train(self, train_in, train_out):
        self.initialized = True
        train_in_formatted = np.array(train_in)
        self.estimator.fit(train_in_formatted, train_out)
示例#3
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
示例#4
0
 def __init__(
     self,
     sc=None,
     partitions="auto",
     n_estimators=100,
     criterion="mse",
     max_depth=None,
     min_samples_split=2,
     min_samples_leaf=1,
     min_weight_fraction_leaf=0.0,
     max_features="auto",
     max_leaf_nodes=None,
     min_impurity_decrease=0.0,
     min_impurity_split=None,
     bootstrap=False,
     oob_score=False,
     n_jobs=None,
     random_state=None,
     verbose=0,
     warm_start=False,
 ):
     ExtraTreesRegressor.__init__(
         self,
         n_estimators=n_estimators,
         criterion=criterion,
         max_depth=max_depth,
         min_samples_split=min_samples_split,
         min_samples_leaf=min_samples_leaf,
         min_weight_fraction_leaf=min_weight_fraction_leaf,
         max_features=max_features,
         max_leaf_nodes=max_leaf_nodes,
         min_impurity_decrease=min_impurity_decrease,
         min_impurity_split=min_impurity_split,
         bootstrap=bootstrap,
         oob_score=oob_score,
         n_jobs=n_jobs,
         random_state=random_state,
         verbose=verbose,
         warm_start=warm_start,
     )
     self.sc = sc
     self.partitions = partitions
示例#5
0
def create_model(min_split=186, njobs=1, verbose=False):
    regressor_params = {
        'n_estimators': 50,
        'criterion': 'mse',
        'min_samples_split': min_split,
        'min_samples_leaf': 1,
        'n_jobs': njobs,
        'verbose': verbose
    }
    model = ExtraTreesRegressor(**regressor_params)
    return model
示例#6
0
 def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False):
     self._hyperparams = {
         'n_estimators': n_estimators,
         'criterion': criterion,
         'max_depth': max_depth,
         'min_samples_split': min_samples_split,
         'min_samples_leaf': min_samples_leaf,
         'min_weight_fraction_leaf': min_weight_fraction_leaf,
         'max_features': max_features,
         'max_leaf_nodes': max_leaf_nodes,
         'min_impurity_decrease': min_impurity_decrease,
         'min_impurity_split': min_impurity_split,
         'bootstrap': bootstrap,
         'oob_score': oob_score,
         'n_jobs': n_jobs,
         'random_state': random_state,
         'verbose': verbose,
         'warm_start': warm_start}
     self._wrapped_model = Op(**self._hyperparams)
示例#7
0
    "Passive Aggressive",
    "SGD",
    "Theil-Sen",
    "RANSAC",
    "K-Neighbors",
    "Radius Neighbors",
    "MLP",
    "Decision Tree",
    "Extra Tree",
    "SVR"
]

classifiers = [
    RandomForestRegressor(n_estimators=200, n_jobs=5,
                          random_state=randomstate),
    ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate),
    # GradientBoostingRegressor(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    # HistGradientBoostingClassifier(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    AdaBoostRegressor(n_estimators=200, random_state=randomstate),
    GaussianProcessRegressor(normalize_y=True),
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
                    max_depth=7,
                    n_estimators=200,
                    min_child_weight=10,
                    subsample=0.7,
                    colsample_bytree=0.7,
                    reg_alpha=0,
                    reg_lambda=0.5)
reg.fit(X_train, y_train)
end = time.time()
y_pred_lgb = reg.predict(X_test)
print(metrics.mean_squared_error(y_test, y_pred_lgb))
print(end - start)

start = time.time()
reg = ExtraTreesRegressor(n_estimators=100,
                          max_depth=7,
                          min_samples_leaf=10,
                          n_jobs=8)
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
print(metrics.mean_squared_error(y_test, y_pred))
print(end - start)

start = time.time()
reg = KNeighborsRegressor(n_neighbors=4, algorithm='kd_tree')
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
print(metrics.mean_squared_error(y_test, y_pred))
print(end - start)
示例#9
0
numfeat = 10

qtfm = PowerTransformer(method='yeo-johnson')
y_train = np.squeeze(qtfm.fit_transform(y_train_tmp.values.reshape(-1, 1)))

selidx, selscore, _ = sel(X_train.values, y_train, n_selected_features=numfeat)
selscoredf = pd.DataFrame(data=np.transpose(
    np.vstack((X_train.columns[selidx].values, selscore))),
                          columns=['Feature', 'Score'])

X_train_selected = X_train.iloc[:, selidx[0:numfeat]]

print(X_train_selected.columns.values)

print("Train classifier...")
clf = ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate)
clf.fit(X_train, y_train)
# save classifier for further use
dump(clf, clfpath)
print("Training complete...")
# clf = load(clfpath)

# VALIDATION SET
# load validation data
validationfeatures = pd.read_csv(
    "/media/yannick/c4a7e8d3-9ac5-463f-b6e6-92e216ae6ac0/BRATS/BraTS2020/validationfeat_normalized.csv",
    index_col="ID")

y_pred_validation = clf.predict(validationfeatures)
pred_validation_df = pd.DataFrame(data=zip(validationfeatures.index.values,
                                           y_pred_validation),
示例#10
0
    regressor = regressor.fit(auto_X, auto_y)
    store_pkl(regressor, name + ".pkl")
    mpg = DataFrame(regressor.predict(auto_X), columns=["mpg"])
    store_csv(mpg, name + ".csv")


build_auto(DecisionTreeRegressor(random_state=13, min_samples_leaf=5),
           "DecisionTreeAuto")
build_auto(
    BaggingRegressor(DecisionTreeRegressor(random_state=13,
                                           min_samples_leaf=5),
                     random_state=13,
                     n_estimators=3,
                     max_features=0.5), "DecisionTreeEnsembleAuto")
build_auto(ElasticNetCV(random_state=13), "ElasticNetAuto")
build_auto(ExtraTreesRegressor(random_state=13, min_samples_leaf=5),
           "ExtraTreesAuto")
build_auto(GradientBoostingRegressor(random_state=13, init=None),
           "GradientBoostingAuto")
build_auto(LassoCV(random_state=13), "LassoAuto")
build_auto(LinearRegression(), "LinearRegressionAuto")
build_auto(
    BaggingRegressor(LinearRegression(), random_state=13, max_features=0.5),
    "LinearRegressionEnsembleAuto")
build_auto(RandomForestRegressor(random_state=13, min_samples_leaf=5),
           "RandomForestAuto")
build_auto(RidgeCV(), "RidgeAuto")
build_auto(XGBRegressor(objective="reg:linear"), "XGBAuto")

housing_df = load_csv("Housing.csv")
示例#11
0
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
			'DecisionTreeRegressor':DecisionTreeRegressor(),
			'DictionaryLearning':DictionaryLearning(),
			'ElasticNet':ElasticNet(),
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),
			'ExtraTreeRegressor':ExtraTreeRegressor(),
			'ExtraTreesClassifier':ExtraTreesClassifier(),
			'ExtraTreesRegressor':ExtraTreesRegressor(),
			'FactorAnalysis':FactorAnalysis(),
			'FastICA':FastICA(),
			'FeatureAgglomeration':FeatureAgglomeration(),
			'FunctionTransformer':FunctionTransformer(),
			'GMM':GMM(),
			'GaussianMixture':GaussianMixture(),
			'GaussianNB':GaussianNB(),
			'GaussianProcess':GaussianProcess(),
			'GaussianProcessClassifier':GaussianProcessClassifier(),
			'GaussianProcessRegressor':GaussianProcessRegressor(),
			'GaussianRandomProjection':GaussianRandomProjection(),
			'GenericUnivariateSelect':GenericUnivariateSelect(),
			'GradientBoostingClassifier':GradientBoostingClassifier(),
			'GradientBoostingRegressor':GradientBoostingRegressor(),
			'GraphLasso':GraphLasso(),
 def __init__(self):
     self.estimator = ExtraTreesRegressor(n_estimators=30)
     self.initialized = False
示例#13
0
def run_tuning(dataset,
               nmin,
               half,
               n_jobs=1,
               output_path='',
               output_name='',
               track_file_name='',
               rt_file_name='',
               data_path=''):

    if len(dataset) == 0:
        # Create dataset
        dataset, _ = prepare_dataset(os.path.join(data_path,
                                                  track_file_name + '.csv'),
                                     os.path.join(data_path,
                                                  rt_file_name + '.csv'),
                                     reward_function='progress',
                                     knn_actions=True)

    X = dataset[state_cols + action_cols].values
    t = dataset['r'].values
    n_samples = len(t)
    ids = list(range(n_samples))

    if half:
        np.random.shuffle(ids)
        ids_A = ids[:math.floor(n_samples / 2)]
        ids_B = ids[math.floor(n_samples / 2):]
    else:
        ids_A = ids

    mdl = ExtraTreesRegressor(n_estimators=100, criterion='mse', n_jobs=n_jobs)

    gcv = GridSearchCV(mdl, {'min_samples_leaf': nmin},
                       cv=10,
                       scoring='neg_mean_squared_error')
    # Fit the models
    gcv.fit(X[ids_A, :], t[ids_A])

    if half:
        gcv_list = []
        gcv_list.append(gcv)

        gcv = GridSearchCV(mdl, {'min_samples_leaf': nmin},
                           cv=10,
                           scoring='neg_mean_squared_error')
        # Fit the models
        gcv.fit(X[ids_B], t[ids_B])
        gcv_list.append(gcv)
        to_save = gcv_list
    else:
        to_save = gcv

    if output_path != '':

        # Save the results
        with open(os.path.join(output_path, output_name + '.pkl'),
                  'wb') as out:
            pickle.dump(to_save, out, pickle.HIGHEST_PROTOCOL)
        print('Saved cross val results as {}'.format(output_name))

    if half:
        return gcv_list
    else:
        return gcv
示例#14
0
def test_run(fn, features, type):
    """ load dataset, build feature set, and do learning
        Parameters
        ----------
        fn: file name of dataset
        features: a list of list, each of which is a feature list for different models
        type: str for indicating feature set
        
        Returns
        -------
        predictions and feature-engineered dataset are saved to files
    """
    np.set_printoptions(precision=4)
    print('test_run ' + type)
    df = load_data(fn)
    check_df(df)
    df = feature_engineering(df)

    print(df.columns)
    #    print(df.head())
    #    print(df.groupby(['peak_hr'])['cnt'].agg(sum))
    y_pred_list = []
    for i, est in enumerate(
        (DecisionTreeRegressor(min_samples_split=20),
         ExtraTreesRegressor(n_estimators=100,
                             max_depth=None,
                             min_samples_split=1,
                             random_state=1234),
         RandomForestRegressor(n_estimators=1000,
                               max_depth=15,
                               random_state=1234,
                               min_samples_split=3,
                               n_jobs=-1),
         GradientBoostingRegressor(n_estimators=150,
                                   max_depth=10,
                                   random_state=0,
                                   min_samples_leaf=20,
                                   learning_rate=0.1,
                                   subsample=0.7,
                                   loss='ls'), svm.SVR(C=30))):
        #        print(features[i])
        df, X_train, X_test, y_train, y_test, y_train_cas, y_test_cas, y_train_reg, y_test_reg, time_test = split_data(
            df, features=features[i])
        y_pred, mse = predict_evaluate(est, X_train, y_train, X_test, y_test)
        est_name = str(est).split('(')[0]
        print(type, est_name, np.round(mse, 4))
        """ feature importance
        if est_name != 'SVR':
            # print out feature importance
            sfi = sorted([(x[0], float('%.4f'%x[1])) for x in zip(features[i], est.feature_importances_)], key=lambda x: x[1], reverse=True)
            print(sfi)
            print([x[0] for x in sfi])
        """
        y_pred_list.append([est_name, mse, y_pred])

    # blending models
    y_pred_blend = np.log1p(.2 * (np.exp(y_pred_list[2][2]) - 1) + .8 *
                            (np.exp(y_pred_list[3][2]) - 1))
    print(
        type + ' blending: 0.2*' + y_pred_list[2][0] + ' + 0.8*' +
        y_pred_list[3][0],
        metrics.mean_squared_error(y_test, y_pred_blend).round(4))
    y_pred_blend = np.log1p(.3 * (np.exp(y_pred_list[1][2]) - 1) + .7 *
                            (np.exp(y_pred_list[3][2]) - 1))
    print(
        type + ' blending: 0.3*' + y_pred_list[1][0] + ' + 0.7*' +
        y_pred_list[3][0],
        metrics.mean_squared_error(y_test, y_pred_blend).round(4))
    y_pred_blend = np.log1p(.3 * (np.exp(y_pred_list[3][2]) - 1) + .7 *
                            (np.exp(y_pred_list[4][2]) - 1))
    print(
        type + ' blending: 0.2*' + y_pred_list[3][0] + ' + 0.8*' +
        y_pred_list[4][0],
        metrics.mean_squared_error(y_test, y_pred_blend).round(4))
    y_pred_blend = np.log1p(.6 * (np.exp(y_pred_list[3][2]) - 1) + .4 *
                            (np.exp(y_pred_list[4][2]) - 1))
    print(
        type + ' blending: 0.6*' + y_pred_list[3][0] + ' + 0.4*' +
        y_pred_list[4][0],
        metrics.mean_squared_error(y_test, y_pred_blend).round(4))
    dff = pd.DataFrame({
        'datetime': time_test[:, 0],
        'mnth': time_test[:, 1],
        'hr': time_test[:, 2],
        'cnt': np.expm1(y_test),
        'prediction': y_pred_blend
    })
    dff.to_csv('../output/prediction_blended.csv',
               index=False,
               columns=['datetime', 'mnth', 'hr', 'cnt', 'prediction'])
    print('blended predictions saved in ../output/prediction_blended.csv')
    df.to_csv('../data/hour_ext.csv')
    print('extended dataset saved in ../data/hour_ext.csv')