Пример #1
0
def get_regressor(training_set):
    """
    Estimation of the value function using a regression algorithm.
    Training set contains tuples of (state, score)
    V: S -> R
    """
    clf = ExtraTreeRegressor()
    clf.fit(*zip(*training_set))
    return clf
Пример #2
0
def init_ML_models():
    models = dict()
    models['lr'] = LinearRegression()
    models['lasso'] = Lasso()
    models['ridge'] = Ridge()
    models['en'] = ElasticNet()
    models['huber'] = HuberRegressor()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    #    models['knn']               = KNeighborsRegressor(n_neighbors=5)
    models['cart'] = DecisionTreeRegressor()
    models['extra'] = ExtraTreeRegressor()
    models['svmr'] = SVR()

    n_trees = 100
    models['ada'] = AdaBoostRegressor(n_estimators=n_trees)
    models['bag'] = BaggingRegressor(n_estimators=n_trees)
    models['rf'] = RandomForestRegressor(n_estimators=n_trees)
    models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
    models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)

    return models
Пример #3
0
def get_models(models=dict()):
    # linear models
    models['lr'] = LinearRegression()
    alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for a in alpha:
        models['lasso-' + str(a)] = Lasso(alpha=a)
    for a in alpha:
        models['ridge-' + str(a)] = Ridge(alpha=a)
    for a1 in alpha:
        for a2 in alpha:
            name = 'en-' + str(a1) + '-' + str(a2)
            models[name] = ElasticNet(a1, a2)
    models['huber'] = HuberRegressor()
    models['lars'] = Lars()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    models['ranscac'] = RANSACRegressor()
    models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
    models['theil'] = TheilSenRegressor()
    # non-linear models
    n_neighbors = range(1, 21)
    for k in n_neighbors:
        models['knn-' + str(k)] = KNeighborsRegressor(n_neighbors=k)
    models['cart'] = DecisionTreeRegressor()
    models['extra'] = ExtraTreeRegressor()
    models['svml'] = SVR(kernel='linear')
    models['svmp'] = SVR(kernel='poly')
    c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for c in c_values:
        models['svmr' + str(c)] = SVR(C=c)
    # ensemble models
    n_trees = 100
    models['ada'] = AdaBoostRegressor(n_estimators=n_trees)
    models['bag'] = BaggingRegressor(n_estimators=n_trees)
    models['rf'] = RandomForestRegressor(n_estimators=n_trees)
    models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
    models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    print('Defined %d models' % len(models))
    return models
Пример #4
0
    def __init__(self,
                 n_estimators=100,
                 type_weight='nearest_neighbors',
                 n_neighbors='auto',
                 max_knn_sample="auto",
                 max_samples="auto",
                 contamination="auto",
                 max_features=1.,
                 bootstrap=False,
                 n_jobs=None,
                 behaviour='deprecated',
                 random_state=None,
                 verbose=0,
                 warm_start=False,
                 pos_label=None):
        super().__init__(
            base_estimator=ExtraTreeRegressor(max_features=1,
                                              splitter='random',
                                              random_state=random_state),
            # here above max_features has no links with self.max_features
            bootstrap=bootstrap,
            bootstrap_features=False,
            n_estimators=n_estimators,
            max_samples=max_samples,
            max_features=max_features,
            warm_start=warm_start,
            n_jobs=n_jobs,
            random_state=random_state,
            verbose=verbose)

        self.behaviour = behaviour
        self.contamination = contamination
        self.n_neighbors = n_neighbors
        self.max_knn_sample = max_knn_sample
        self.pos_label = pos_label
        self.type_weight = type_weight
        self.nn_ = None
        self.depths_ = None
        self.nn_weight_ = None
Пример #5
0
def ExtraTree(X_train, X_test, y_train, y_test):
    reg = ExtraTreeRegressor()
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="ExtraTree",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Пример #6
0
def fit_data(X, Y, type):
    global rnd
    error_list = []
    if type == "LinearRegression":
        data_fit = LinearRegression()

    elif type == "ExtraTreeRegressor":
        data_fit = ExtraTreeRegressor()

    elif type == "DecisionTreeRegressor":
        data_fit = DecisionTreeRegressor()

    elif type == "RandomForestRegressor":
        data_fit = RandomForestRegressor()

    elif type == "GradientBoostingRegressor":
        data_fit = GradientBoostingRegressor()

    elif type == "XGBRegressor":
        data_fit = XGBRegressor()

    for i in range(100, 1000, 1):
        rnd.append(i)
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=.3,
                                                            random_state=i)

        data_fit.fit(x_train, y_train)

        reesult = data_fit.predict(x_test)

        error_list.append(np.sqrt(mean_squared_error(y_test, reesult)))

    print("using " + type + " error is :")
    print(min(error_list))

    print("using randonstate = ", rnd[error_list.index(min(error_list))])
Пример #7
0
def model_train(x, y, delay, params_dict, version_):
    if params_dict['model_name'] == 'LinearRegression':
        model = LinearRegression(fit_intercept=True,
                                 normalize=False,
                                 n_jobs=-1)  # 1. 线性回归
    elif params_dict['model_name'] == 'RandomForestRegressor':
        model = RandomForestRegressor(
            n_estimators=params_dict['n_estimators'],
            n_jobs=7,
            random_state=2017,
            max_depth=params_dict['max_depth'],
            oob_score=True,
            min_samples_split=params_dict['min_samples_split'],
            min_samples_leaf=params_dict['min_samples_leaf'])
    elif params_dict['model_name'] == 'ExtraTreeRegressor':
        model = ExtraTreeRegressor(
            max_depth=params_dict['max_depth'],
            random_state=2017,
            min_samples_split=params_dict['min_samples_split'],
            min_samples_leaf=params_dict['min_samples_leaf']
        )  # 9. ExtraTree极端随机树回归
    else:
        model = RandomForestRegressor()
    model_n = 'Model_' + str(delay +
                             1) + '_' + params_dict['model_name'] + '_V' + str(
                                 version_) + '.model'
    model_dir = fgl.MODEL_DIR + '/' + model_n
    print('Start Training Model ' + str(delay + 1) + '...', 'Time:',
          datetime.datetime.now())
    print(list(x.columns))
    best_model = model.fit(x, y)
    # print(best_model.feature_importances_)
    print('Model ' + str(delay + 1) + 'is ok!', 'Time:',
          datetime.datetime.now())
    joblib.dump(best_model, model_dir)
    print('Model ' + str(delay + 1) + 'saved!', 'Time:',
          datetime.datetime.now())
    return model_n, best_model, '-'.join(list(x.columns.values))
Пример #8
0
def get_models_(models=dict()):
	# non-linear models
	models['knn'] = KNeighborsRegressor(n_neighbors=8)
	models['cart'] = DecisionTreeRegressor()
	models['extra'] = ExtraTreeRegressor()
	# # ensemble models
	n_trees = 100 #500
	#models['ada'] = AdaBoostRegressor(n_estimators=n_trees)
	models['bag'] = BaggingRegressor(n_estimators=n_trees)
	models['rf'] = RandomForestRegressor(n_estimators=n_trees)
	#models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
	models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees)
    
	models['xgb'] = XGBRegressor(max_depth=8, n_estimators=n_trees,
                                 min_child_weight=300, colsample_bytree=0.8, 
                                 subsample=0.8, eta=0.3, 
                                 seed=42, silent=True)
    

	models['lgbm'] = LGBMRegressor(n_jobs=-1,        random_state=0, 
                                   n_estimators=n_trees, learning_rate=0.001, 
                                   num_leaves=2**6,  subsample=0.9, 
                                   subsample_freq=1, colsample_bytree=1.)
Пример #9
0
    def run(self, configuration: Configuration) -> None:
        inputs = torch.from_numpy(np.load(configuration.inputs))
        missing_mask = torch.from_numpy(np.load(configuration.missing_mask))

        assert inputs.shape == missing_mask.shape

        # the model need np.nan in the missing values to work
        inputs = compose_with_mask(missing_mask,
                                   where_one=torch.empty_like(inputs).fill_(np.nan),
                                   where_zero=inputs,
                                   differentiable=False)  # cannot be differentiable with nans!

        # create the model
        model = IterativeImputer(random_state=configuration.get("seed", 0),
                                 estimator=ExtraTreeRegressor(),
                                 missing_values=np.nan)

        # go back to torch (annoying)
        model.fit(inputs.numpy())

        # save the model
        with open(create_parent_directories_if_needed(configuration.outputs), "wb") as model_file:
            pickle.dump(model, model_file)
Пример #10
0
 def __init__(self,
              n_estimators=100,
              max_samples=256,
              max_features=1.,
              bootstrap=True,
              n_jobs=1,
              random_state=None,
              verbose=0):
     super(IsolationForest, self).__init__(
         base_estimator=ExtraTreeRegressor(max_depth=int(
             np.ceil(np.log2(max(max_samples, 2)))),
                                           max_features=1,
                                           splitter='random',
                                           random_state=random_state),
         # here above max_features has no links with self.max_features
         bootstrap=bootstrap,
         bootstrap_features=False,
         n_estimators=n_estimators,
         max_samples=max_samples,
         max_features=max_features,
         n_jobs=n_jobs,
         random_state=random_state,
         verbose=verbose)
Пример #11
0
def run(type):
    if type == 'decision_tree':
        from sklearn import tree
        model = tree.DecisionTreeRegressor()
    elif type == 'linear':
        from sklearn import linear_model
        model = linear_model.LinearRegression()
    elif type == 'svm':
        from sklearn import svm
        model = svm.SVR()
    elif type == 'KNN':
        from sklearn import neighbors
        model = neighbors.KNeighborsRegressor()
    elif type == 'random_forest':
        from sklearn import ensemble
        model = ensemble.RandomForestRegressor(n_estimators=20)
    elif type == 'adaboost':
        from sklearn import ensemble
        model = ensemble.AdaBoostRegressor(n_estimators=50)
    elif type == 'extra_tree':
        from sklearn.tree import ExtraTreeRegressor
        model = ExtraTreeRegressor()
    method(model, model)
Пример #12
0
    def fit(self, trend, seasonality):

        self.fit = None
        self.trend = trend
        self.seasonality = seasonality

        self.lags = [1, seasonality]

        # try:
        xtree = ExtraTreeRegressor()
        self.feature_selection(model=xtree)

        param_grid = {
            'max_depth': [2, 4, 6, 8]
        }

        model = GridSearchCV(estimator=xtree, param_grid=param_grid)
        self.fit = model.fit(self.selected_X, self.y.to_numpy())

        # except:
        #  print('ERROR: Could not run Support Vector Regressor! Please check your input data. \n')

        return self.fit
Пример #13
0
def get_model(model_name):
    model_dict = {
        # 回归
        "model_DecisionTreeRegressor":
        tree.DecisionTreeRegressor(),  # 决策树
        "model_LinearRegression":
        linear_model.LinearRegression(),  # 线性回归
        "model_SVR":
        svm.SVR(),  # SVM
        "model_KNeighborsRegressor":
        neighbors.KNeighborsRegressor(),  # KNN
        "model_RandomForestRegressor":
        ensemble.RandomForestRegressor(n_estimators=20),  # 随机森林,这里使用20个决策树
        "model_AdaBoostRegressor":
        ensemble.AdaBoostRegressor(n_estimators=50),  # Adaboost,这里使用50个决策树
        "model_GradientBoostingRegressor":
        ensemble.GradientBoostingRegressor(
            n_estimators=100),  # GBRT,这里使用100个决策树
        "model_BaggingRegressor":
        BaggingRegressor(),  # Bagging回归
        "model_ExtraTreeRegressor":
        ExtraTreeRegressor(),  # ExtraTree极端随机树回归
        # 分类
        "model_LogisticRegression_weight":
        LogisticRegression(C=1000, class_weight={
            0: 0.8,
            1: 0.2
        }),  # 逻辑回归
        "model_LogisticRegression":
        LogisticRegression(C=1000),  # 逻辑回归(无权重)
        "model_SVC":
        svm.SVC(class_weight="balanced"),  # 向量机
        "model_RandomForestClassifier":
        RandomForestClassifier(n_estimators=7, class_weight="balanced")  # 随机森林
    }

    return model_dict[model_name]
Пример #14
0
def GET_ORDINARY_MODELS(prediction_type=None):
    if prediction_type == "C":
        return {
            "LR": LogisticRegression(),
            "LDA": LinearDiscriminantAnalysis(),
            "GNB": GaussianNB(),
            "KNC": KNeighborsClassifier(),
            "SVC": SVC(),
            "ETC": ExtraTreeClassifier(),
            "DTC": DecisionTreeClassifier()
        }
    elif prediction_type == "R":
        return {
            "LR": LinearRegression(),
            "RIDGE": Ridge(),
            "LASSO": Lasso(),
            "EN": ElasticNet(),
            "KNR": KNeighborsRegressor(),
            "SVR": SVR(),
            "ETR": ExtraTreeRegressor(),
            "DTR": DecisionTreeRegressor()
        }
    else:
        raise Exception()
Пример #15
0
    def __init__(self,
                 n_estimators=10,
                 max_depth=5,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 min_weight_fraction_leaf=0.,
                 max_leaf_nodes=None,
                 sparse_output=True,
                 n_jobs=1,
                 random_state=None,
                 verbose=0,
                 warm_start=False,
                 use_one_hot=True):
        super(RandomTreesEmbeddingUnsupervised, self).__init__(
            base_estimator=ExtraTreeRegressor(),
            n_estimators=n_estimators,
            estimator_params=("criterion", "max_depth", "min_samples_split",
                              "min_samples_leaf", "min_weight_fraction_leaf",
                              "max_features", "max_leaf_nodes",
                              "random_state"),
            bootstrap=False,
            oob_score=False,
            n_jobs=n_jobs,
            random_state=random_state,
            verbose=verbose,
            warm_start=warm_start)

        self.criterion = 'mse'
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = 1
        self.max_leaf_nodes = max_leaf_nodes
        self.sparse_output = sparse_output
        self.use_one_hot = use_one_hot
Пример #16
0
def get_linear_model(model_name):
    if model_name == "LinearRegression":
        reg = LinearRegression()
    elif model_name == "RandomForestRegressor":
        reg = RandomForestRegressor()
    elif model_name == "DecisionTreeRegressor":
        reg = DecisionTreeRegressor()
    elif model_name == "GaussianProcessRegressor":
        reg = GaussianProcessRegressor()
    elif model_name == "ExtraTreeRegressor":
        reg = ExtraTreeRegressor()
    elif model_name == "LGBMRegressor":
        reg = lgbm.sklearn.LGBMRegressor()
    elif model_name == "BaggingRegressor":
        reg = BaggingRegressor()
    elif model_name == "KNeighborsRegressor":
        reg = KNeighborsRegressor()
    elif model_name == "Lars":
        reg = Lars()
    elif model_name == "SVR":
        reg = SVR()
    elif model_name == "NuSVR":
        reg = NuSVR()
    return reg
Пример #17
0
def _GET_ORDINARY_MODEL_DICT(_Type = "classification"): # Fine!
    if _Type == "classification":
        return {
            "LR" : LogisticRegression(),
            "LDA" : LinearDiscriminantAnalysis(),
            "GNB" : GaussianNB(),
            "KNC" : KNeighborsClassifier(),
            "DTC" : DecisionTreeClassifier(),
            "ETC" : ExtraTreeClassifier(),
            "SVC" : SVC()
            }
    elif _Type == "regression":
        return {
            "LR" : LinearRegression(),
            "RIDGE" : Ridge(),
            "LASSO" : Lasso(),
            "EN" : ElasticNet(),
            "KNR" : KNeighborsRegressor(),
            "DTR" : DecisionTreeRegressor(),
            "ETR" : ExtraTreeRegressor(),
            "SVR" : SVR()
            }
    else:
        raise Exception("")
Пример #18
0
	else:
		pipeline.verify(auto_X.sample(n = 3, random_state = 13))
	pipeline.configure(**pmml_options)
	store_pmml(pipeline, name)
	if isinstance(regressor, IsolationForest):
		decision_function = DataFrame(pipeline.decision_function(auto_X), columns = ["decisionFunction"])
		outlier = DataFrame(pipeline.predict(auto_X), columns = ["outlier"])
		outlier['outlier'] = outlier['outlier'].apply(lambda x: str(bool(x == -1)).lower())
		store_csv(pandas.concat((decision_function, outlier), axis = 1), name)
	else:
		mpg = DataFrame(pipeline.predict(auto_X), columns = ["mpg"])
		store_csv(mpg, name)

if "Auto" in datasets:
	build_auto(AdaBoostRegressor(n_estimators = 31, random_state = 13), "AdaBoostAuto")
	build_auto(DecisionTreeRegressor(random_state = 13), "DecisionTreeAuto", compact = False, flat = True)
	build_auto(GradientBoostingRegressor(n_estimators = 31, random_state = 13), "GradientBoostingAuto")
	build_auto(IsolationForest(n_estimators = 31, random_state = 13), "IsolationForestAuto")
	build_auto(LGBMRegressor(objective = "regression", n_estimators = 31, random_state = 13), "LightGBMAuto")
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(RandomForestRegressor(n_estimators = 17, random_state = 13), "RandomForestAuto", compact = False, flat = False)
	build_auto(VotingRegressor(estimators = [("major", DecisionTreeRegressor(max_depth = 8, random_state = 13)), ("minor", ExtraTreeRegressor(max_depth = 5, random_state = 13))], weights = [0.7, 0.3]), "VotingEnsembleAuto")
	build_auto(XGBRegressor(objective = "reg:squarederror", n_estimators = 31, random_state = 13), "XGBoostAuto")

sparsify("Auto")

auto_X, auto_y = load_auto("AutoNA")

if ("Auto" in datasets) or ("AutoNA" in datasets):
	build_auto(LGBMRegressor(objective = "regression", n_estimators = 31, random_state = 13), "LightGBMAutoNA")
	build_auto(XGBRegressor(objective = "reg:squarederror", n_estimators = 31, random_state = 13), "XGBoostAutoNA")
Пример #19
0
    #print cols[11]
    #printcols[0]
    X1 = cols[0:11]
    #X1 = preprocessing.normalize(X1)
    X = list(zip(*X1))
    Y = cols[11]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=rn) 
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    #print(y_test)

    lin_reg_mod = ExtraTreeRegressor()
    
    lin_reg_mod.fit(X_train, y_train)  
    pred = lin_reg_mod.predict(X_test)
    #print(pred)
    #print(y_test)
    test_set_r2 = r2_score(y_test, pred)
    #print(test_set_r2)
    tr2+=test_set_r2
    
    #abs_er = mean_absolute_error(y_test, pred)
    #tabse+=abs_er

    temp = []
    for (i,j) in zip(y_test, pred):
        t = (abs(i-j))/float(i)
Пример #20
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re,os
data=pd.read_csv('ice.csv')
x=data[['temp','street']]
y=data['ice']
clf=ExtraTreeRegressor()
clf.fit(x,y)
p=clf.predict(x)
print clf.score(x,y)
t=np.arange(0.0,31.0)
plt.plot(t,data['ice'],'--',t,p,'-')
plt.show()
Пример #21
0
nca_val = NeighborhoodComponentsAnalysis(random_state=0)
nca_ar.fit(X_train, ar_train)
nca_val.fit(X_train, val_train)

X_train_ar = nca_ar.transform(X_train)
X_train_val = nca_val.transform(X_train)
X_test_ar = nca_ar.transform(X_test)
X_test_val = nca_val.transform(X_test)

# X_train_ar = X_train
# X_train_val = X_train
# X_test_ar = X_test
# X_test_val = X_test

parameters = {"n_estimators": [50, 75, 100], "learning_rate": [0.1, 0.5, 1.]}
reg_ar = AdaBoostRegressor(ExtraTreeRegressor(max_depth=5, random_state=0),
                           random_state=0)
reg_val = AdaBoostRegressor(ExtraTreeRegressor(max_depth=5, random_state=0),
                            random_state=0)

clf_ar = GridSearchCV(reg_ar, parameters)
clf_val = GridSearchCV(reg_val, parameters)
clf_ar.fit(X_train_ar, ar_train)
clf_val.fit(X_train_val, val_train)

print(clf_ar.score(X_train_ar, ar_train))
print(clf_val.score(X_train_val, val_train))

print("--------------Test-------------------")
print(clf_ar.score(X_test_ar, ar_test))
print(clf_val.score(X_test_val, val_test))
Пример #22
0
	def Build_MapMean_Model(self):
		MapMean =  ExtraTreeRegressor()
		MapMean.fit(self.MapFeature_list,(self.MapMean_list))
		self.Dump_Model('Model/MapMean.model',MapMean)
		print MapMean.feature_importances_
model_RandomForestRegressor = ensemble.RandomForestRegressor(
    n_estimators=20)  #这里使用20个决策树
####3.6Adaboost回归####
from sklearn import ensemble
model_AdaBoostRegressor = ensemble.AdaBoostRegressor(
    n_estimators=50)  #这里使用50个决策树
####3.7GBRT回归####
from sklearn import ensemble
model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(
    n_estimators=100)  #这里使用100个决策树
####3.8Bagging回归####
from sklearn.ensemble import BaggingRegressor
model_BaggingRegressor = BaggingRegressor()
####3.9ExtraTree极端随机树回归####
from sklearn.tree import ExtraTreeRegressor
model_ExtraTreeRegressor = ExtraTreeRegressor()

# In[48]:


def getHeadFromFile():
    #paths=path[0:path.index(".")]
    #names=paths.split("-")
    #names=['date', 'sales_num']
    return names


def getDataFromFile(jsonstring):
    #df = pd.read_csv(path, sep='\t', low_memory=False)
    #new_df = df.replace('?', np.nan)
    #datas = new_df.dropna(how = 'any')
 def extra_tree_regressor(self):
     x_train, x_test, y_train, y_test = self.preprocessing()
     model = ExtraTreeRegressor()
     y_pred = model.fit(x_train, y_train).predict(x_test)
     self.printing(y_test, y_pred, 'Extra Tree')
    ri_MakingLT_labels_train,
    test_size=0.25,
    random_state=42)

###**Adaboost(ExtraDecisionTree)**###

# **Adaboost(ExtraDecisionTree)** 모델 훈련 시킴
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import ExtraTreeRegressor
ada_et_tree_reg = AdaBoostRegressor(base_estimator=ExtraTreeRegressor(
    ccp_alpha=0.0,
    criterion='mse',
    max_depth=11,
    max_features='auto',
    max_leaf_nodes=None,
    min_impurity_decrease=0.0,
    min_impurity_split=None,
    min_samples_leaf=1,
    min_samples_split=2,
    min_weight_fraction_leaf=0.0,
    random_state=42,
    splitter='random'),
                                    learning_rate=0.1,
                                    loss='exponential',
                                    n_estimators=150,
                                    random_state=42)
ada_et_tree_reg.fit(ri_MakingLT_prepared_train, ri_MakingLT_labels_train)
ri_MakingLT_predicted = ada_et_tree_reg.predict(ri_MakingLT_prepared_test)

from sklearn.metrics import mean_squared_error
ada_et_tree_reg_mse = mean_squared_error(ri_MakingLT_labels_test,

from sklearn.ensemble import RandomForestRegressor
rf_model=RandomForestRegressor(n_estimators=700,random_state=42)
rf_model.fit(x_train,y_train)
y_predict=rf_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### ExtraTreeRegressor

# In[85]:


from sklearn.tree import ExtraTreeRegressor
extratree_model=ExtraTreeRegressor(random_state=42)
extratree_model.fit(x_train,y_train)
y_predict=extratree_model.predict(x_test)
r2_score(y_test,y_predict.ravel())


# ### Result
# 
# So from here we can conclude that out of multiple models RandomForestRegressor model is working well with 90.66% accuracy. which is a very good accuracy.

# In[86]:


# Using pickle we will save our model so that we can use it further
import pickle
pickle.dump(extratree_model,open('model.pkl','wb'))
Пример #27
0
    def fit(self,
            training_queries,
            training_query_weights=None,
            validation_queries=None,
            validation_query_weights=None):
        '''
        Train a LambdaRandomForest model on given training queries.
        Optionally, use validation queries for finding an optimal
        number of trees using early stopping.

        Parameters
        ----------
        training_queries : Queries instance
            The set of queries from which the model will be trained.

        training_query_weights : array of floats, shape = [n_queries],
                                 or None
            The weight given to each training query, which is used to
            measure its importance. Queries with 0.0 weight will never
            be used in training.

        validation_queries : Queries instance or None
            The set of queries used for early stopping.

        validation_query_weights : array of floats, shape = [n_queries]
                                   or None
            The weight given to each validation query, which is used to
            measure its importance. Queries with 0.0 weight will never
            be used in validation.

        Returns
        -------
        self : object
            Returns self.
        '''
        metric = MetricFactory(self.metric,
                               queries=aslist(training_queries,
                                              validation_queries),
                               random_state=self.random_state)

        # If the metric used for training is normalized, it is advantageous
        # to precompute the scaling factor for each query in advance.
        training_query_scales = metric.compute_scaling(
            training_queries, query_weights=training_query_weights)

        if validation_queries is None:
            validation_queries = training_queries
            validation_query_scales = training_query_scales.copy()
        else:
            validation_query_scales = metric.compute_scaling(
                validation_queries, query_weights=validation_query_weights)

        # The first row is reserved for the ranking scores computed
        # in the previous fold of regression trees, see below, how
        # the tree ensemble is evaluated, that is why +1.
        validation_ranking_scores = np.zeros(
            (self.n_jobs + 1, validation_queries.document_count()),
            dtype=np.float64)

        logger.info('Training of LambdaRandomForest model has started.')

        estimators = []

        if self.random_thresholds:
            for k in range(self.n_estimators):
                estimators.append(
                    ExtraTreeRegressor(
                        max_depth=self.max_depth,
                        max_leaf_nodes=self.max_leaf_nodes,
                        min_samples_split=self.min_samples_split,
                        min_samples_leaf=self.min_samples_leaf,
                        max_features=self.max_features,
                        random_state=self.random_state))
        else:
            for k in range(self.n_estimators):
                estimators.append(
                    DecisionTreeRegressor(
                        max_depth=self.max_depth,
                        max_leaf_nodes=self.max_leaf_nodes,
                        min_samples_split=self.min_samples_split,
                        min_samples_leaf=self.min_samples_leaf,
                        max_features=self.max_features,
                        random_state=self.random_state))

        # Best performance and index of the last tree.
        best_performance = -np.inf
        best_performance_k = -1

        # Counts how many trees have been trained since the last
        # improvement on the validation set.
        performance_not_improved = 0

        # Partition the training into a proper number of folds
        # to benefit from the parallelization at best.
        if self.n_estimators > self.n_jobs:
            estimator_indices = np.array_split(
                np.arange(self.n_estimators, dtype=np.intc),
                (self.n_estimators + self.n_jobs - 1) / self.n_jobs)
        else:
            estimator_indices = [np.arange(self.n_estimators)]

        for fold_indices in estimator_indices:
            # Train all trees in the current fold...
            fold_estimators = \
                Parallel(n_jobs=self.n_jobs, backend='threading')(
                    delayed(parallel_build_trees, check_pickle=False)(
                        i, estimators[i], self.n_estimators, metric.copy(),
                        training_queries, training_query_scales,
                        training_query_weights, self.use_newton_method,
                        self.bootstrap, self.subsample_queries,
                        self.subsample_documents,
                        self.random_state.randint(1, np.iinfo('i').max),
                        self.sigma, validation_queries,
                        validation_ranking_scores[i - fold_indices[0] + 1])
                    for i in fold_indices)

            self.estimators.extend(fold_estimators)

            # Compute the ranking score of validation queries for every
            # new tree that has been just trained.
            np.cumsum(validation_ranking_scores[:(len(fold_indices) + 1)],
                      out=validation_ranking_scores[:(len(fold_indices) + 1)],
                      axis=0)

            for i, ranking_scores in enumerate(
                    validation_ranking_scores[1:, :]):
                # Get the performance of the current model consisting
                # of `fold_indices[0] + i + 1` number of trees.
                validation_performance = metric.evaluate_queries(
                    validation_queries,
                    ranking_scores,
                    scales=validation_query_scales)

                logger.info(
                    '#%08d: %s (%s): %11.8f' %
                    (fold_indices[i], 'training' if
                     validation_queries is training_queries else 'validation',
                     metric, validation_performance))

                if validation_performance > best_performance:
                    best_performance = validation_performance
                    best_performance_k = fold_indices[i]
                    performance_not_improved = 0
                else:
                    performance_not_improved += 1

                if (performance_not_improved >= self.estopping
                        and self.min_n_estimators <= fold_indices[i] + 1):
                    break

            if (performance_not_improved >= self.estopping
                    and self.min_n_estimators <= fold_indices[i] + 1):
                logger.info(
                    'Stopping early since no improvement on %s '
                    'queries has been observed for %d iterations '
                    '(since iteration %d)' %
                    ('training' if validation_queries is training_queries else
                     'validation', self.estopping, best_performance_k + 1))
                break

            # Copy the last ranking scores for the next validation "fold".
            validation_ranking_scores[0, :] = validation_ranking_scores[
                len(fold_indices), :]

        if validation_queries is not training_queries:
            logger.info('Final model performance (%s) on validation queries: '
                        '%11.8f' % (metric, best_performance))
        else:
            logger.info('Final model performance (%s) on training queries: '
                        '%11.8f' % (metric, best_performance))

        # Make sure the model has the wanted size.
        best_performance_k = max(best_performance_k, self.min_n_estimators - 1)

        # Leave the estimators that led to the best performance,
        # either on training or validation set.
        del self.estimators[best_performance_k + 1:]

        # Correct the number of trees.
        self.n_estimators = len(self.estimators)

        self.best_performance = best_performance

        logger.info('Training of LambdaRandomForest model has finished.')

        return self
Пример #28
0
def predict_extra_tree(train_X, train_Y, test, param=30):
    clf = ExtraTreeRegressor(min_samples_leaf=param, min_samples_split=1,
                             criterion='mse')
    clf.fit(train_X, train_Y)
    preds = clf.predict(test)
    return preds
def build_lonely_tree_regressor(X, y, max_features, max_depth, min_samples_split):
	clf = ExtraTreeRegressor(max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split)
	clf = clf.fit(X, y)
	return clf
Пример #30
0
def doregress(X_train, y_train, n_train, X_test, y_test, n_test, band, fnames):
    lin  = LinearRegression()
    lin.fit(X_train, y_train)
    lres  = lin.predict(X_test) - y_test
    zl, ml, sl, fl = summstats(z_test, lres, n_test)
    
    #gbr1 = GradientBoostingRegressor(loss="ls")
    #gbr2 = GradientBoostingRegressor(loss="lad")
    #gbr1.fit(X_train, y_train)
    #gbr2.fit(X_train, y_train)
    #g1res = gbr1.predict(X_test) - y_test
    #g2res = gbr2.predict(X_test) - y_test
    #g1z, g1med, g1std = summstats(z_test, g1res)
    #g2z, g2med, g2std = summstats(z_test, g2res)
    
    #ada   = AdaBoostRegressor()
    #ada.fit(X_train, y_train)
    #ares  = ada.predict(X_test) - y_test
    #az, amed, astd = summstats(z_test, ares)

    # Some of these appear to be unstable
    # I.e. feature importance changes
    #for extension in ("A", "B", "C", "D", "E"):

    for extension in ("A",):
        print "# Regressing", extension

        xtr   = ExtraTreeRegressor()
        xtr.fit(X_train, y_train)
        zx, mx, sx, fx = doplot(xtr, X_test, y_test, z_test, n_test, fnames, "%s-band ExtraTreeRegressor"%(band), "R_%s_%s_ext.png"%(band, extension))

        xtrw  = ExtraTreeRegressor()
        xtrw.fit(X_train, y_train, sample_weight=np.log10(n_train))
        zxw, mxw, sxw, fxw = doplot(xtrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted ExtraTreeRegressor"%(band), "R_%s_%s_ext_weight.png"%(band, extension))

        ####

        tree = DecisionTreeRegressor()
        tree.fit(X_train, y_train)
        zt, mt, st, ft = doplot(tree, X_test, y_test, z_test, n_test, fnames, "%s-band DecisionTreeRegressor"%(band), "R_%s_%s_tree.png"%(band, extension))

        treew = DecisionTreeRegressor()
        treew.fit(X_train, y_train, sample_weight=np.log10(n_train))
        ztw, mtw, stw, ftw = doplot(treew, X_test, y_test, z_test, n_test, fnames, "%s-band weighted DecisionTreeRegressor"%(band), "R_%s_%s_tree_weight.png"%(band, extension))

        ####
        weights = n_train
        nt      = 50

        rfr  = RandomForestRegressor(n_estimators=nt)
        rfr.fit(X_train, y_train)
        zr, mr, sr, fr = doplot(rfr, X_test, y_test, z_test, n_test, fnames, "%s-band RandomForestRegressor"%(band), "R_%s_%s_%d_rfr.png"%(band, extension, nt))
                
        rfrw  = RandomForestRegressor(n_estimators=nt)
        rfrw.fit(X_train, y_train, sample_weight=weights)
        zrw, mrw, srw, frw = doplot(rfrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted RandomForestRegressor"%(band), "R_%s_%s_%d_rfr_weight.png"%(band, extension, nt))
        print "RF %d : %.5e +/- %.5e vs weighted %.5e +/- %.5e" % (nt, 
                                                                      np.median(fr), 0.741 * (np.percentile(fr, 75) - np.percentile(fr, 25)),
                                                                      np.median(frw), 0.741 * (np.percentile(frw, 75) - np.percentile(frw, 25)))
        
        ####

        # Compare all models
        fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12))
        sp1.plot(zl, ml, "r-", label="LinearRegression")
        sp1.plot(zt, mt, "b-", label="DecisionTreeRegressor")
        sp1.plot(zr, mr, "g-", label="RandomForestRegressor")
        sp1.plot(zx, mx, "m-", label="ExtraTreeRegressor")

        sp2.plot(zl[np.where(sl>0.)], sl[np.where(sl>0.)], "r-")
        sp2.plot(zt[np.where(st>0.)], st[np.where(st>0.)], "b-")
        sp2.plot(zr[np.where(sr>0.)], sr[np.where(sr>0.)], "g-")
        sp2.plot(zx[np.where(sx>0.)], sx[np.where(sx>0.)], "m-")
        ymin, ymax = sp2.get_ylim()
        sp2.set_ylim(max(1e-7,ymin), 1e-1)

        sp3.plot(zl[np.where(fl>0.)], fl[np.where(fl>0.)], "r-")
        sp3.plot(zt[np.where(ft>0.)], ft[np.where(ft>0.)], "b-")
        sp3.plot(zr[np.where(fr>0.)], fr[np.where(fr>0.)], "g-")
        sp3.plot(zx[np.where(fx>0.)], fx[np.where(fx>0.)], "m-")
        ymin, ymax = sp3.get_ylim()
        sp3.set_ylim(max(1e-7,ymin), 1.1)

        sp1.legend(loc=2, fancybox=True)
        sp1.set_title("Mean refraction residual (arcsec)", weight="bold")
        sp2.set_ylabel("RMS residual (arcsec)", weight="bold")
        sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold")
        sp3.set_xlabel("Zenith distance (deg)", weight="bold")
        sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5)
        sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5)
        sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5)
        sp2.semilogy()
        sp3.semilogy()
        plt.savefig("R_%s_%s.png" % (band, extension))

        ###

        fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12))
        sp1.plot(zl,  ml,  "r-", label="LinearRegression")
        sp1.plot(ztw, mtw, "b-", label="DecisionTreeRegressor weighted")
        sp1.plot(zrw, mrw, "g-", label="RandomForestRegressor weighted")
        sp1.plot(zxw, mxw, "m-", label="ExtraTreeRegressor weighted")

        sp2.plot(zl[np.where(sl>0.)],  sl[np.where(sl>0.)],  "r-")
        sp2.plot(ztw[np.where(stw>0.)], stw[np.where(stw>0.)], "b-")
        sp2.plot(zrw[np.where(srw>0.)], srw[np.where(srw>0.)], "g-")
        sp2.plot(zxw[np.where(sxw>0.)], sxw[np.where(sxw>0.)], "m-")
        ymin, ymax = sp2.get_ylim()
        sp2.set_ylim(max(1e-7,ymin), 1e-1)

        sp3.plot(zl[np.where(fl>0.)],  fl[np.where(fl>0.)],  "r-")
        sp3.plot(ztw[np.where(ftw>0.)], ftw[np.where(ftw>0.)], "b-")
        sp3.plot(zrw[np.where(frw>0.)], frw[np.where(frw>0.)], "g-")
        sp3.plot(zxw[np.where(fxw>0.)], fxw[np.where(fxw>0.)], "m-")
        ymin, ymax = sp3.get_ylim()
        sp3.set_ylim(max(1e-7,ymin), 1.1)

        sp1.legend(loc=2, fancybox=True)
        sp1.set_title("Mean refraction residual (arcsec)", weight="bold")
        sp2.set_ylabel("RMS residual (arcsec)", weight="bold")
        sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold")
        sp3.set_xlabel("Zenith distance (deg)", weight="bold")
        sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5)
        sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5)
        sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5)
        sp2.semilogy()
        sp3.semilogy()
        plt.savefig("R_%s_%s_weight.png" % (band, extension))
Пример #31
0
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

new_X = pd.DataFrame(scaled_X, columns=X.columns)
new_X.head

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(new_X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

#check r2 score accuracy for Train data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))

#check r2 score accuracy for Test data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_test, y_test)
print(model.score(X_test, y_test))

print(model.feature_importances_)
imp_feat = pd.Series(model.feature_importances_, index=X.columns)
imp_feat.nlargest(5).plot(kind='barh')
plt.show()

from sklearn.linear_model import LinearRegression
    'mse_train', 'mse_test', 'mae_train', 'mae_test', 'mdae_train', 'mdae_test'
]
reg = [
    linear_model.LinearRegression(),
    linear_model.Ridge(max_iter=800),
    linear_model.RidgeCV(),
    linear_model.Lasso(max_iter=800),
    linear_model.LassoLarsCV(max_iter=800),
    linear_model.RANSACRegressor(),
    linear_model.BayesianRidge(),
    linear_model.ARDRegression(),
    linear_model.HuberRegressor(max_iter=800),
    linear_model.TheilSenRegressor(max_iter=800),
    PLSRegression(),
    DecisionTreeRegressor(),
    ExtraTreeRegressor(),
    BaggingRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    RandomForestRegressor(),
    linear_model.PassiveAggressiveRegressor(max_iter=800, tol=.001),
    linear_model.ElasticNet(max_iter=800),
    linear_model.SGDRegressor(max_iter=800, tol=.001),
    svm.SVR(),
    KNeighborsRegressor(),
    RadiusNeighborsRegressor(radius=1.5),
    GaussianProcessRegressor()
]

listreg = [
    'LinearRegression', 'Ridge', 'RidgeCV', 'Lasso', 'LassoLarsCV',
Пример #33
0
from sklearn import ensemble
model_adaboost_regressor = ensemble.AdaBoostRegressor(
    n_estimators=50)  # 这里使用50个决策树

# 7.GBRT回归
from sklearn import ensemble
model_gradient_boosting_regressor = ensemble.GradientBoostingRegressor(
    n_estimators=100)  # 这里使用100个决策树

# 8.Bagging回归
from sklearn import ensemble
model_bagging_regressor = ensemble.BaggingRegressor()

# 9.ExtraTree极端随机数回归
from sklearn.tree import ExtraTreeRegressor
model_extra_tree_regressor = ExtraTreeRegressor()

# 10.多项式回归
model_Polynomial = make_pipeline(PolynomialFeatures(3), Ridge())

# 11.高斯过程(Gaussian Processes)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
kernel = DotProduct() + WhiteKernel()
model_GaussianProcessRegressor = GaussianProcessRegressor(kernel=kernel,
                                                          random_state=0)


# 回归部分
def try_different_method(model):
    model.fit(x_train, y_train)
Пример #34
0
	def Build_MapMean_Model(self):
		knn_MapMean =  ExtraTreeRegressor()

		knn_MapMean.fit(self.MapFeature_list,self.MapMean_list)
		print knn_MapMean.feature_importances_
		self.Dump_Model('Model/MapMean.model',knn_MapMean)
Пример #35
0
 def __init__(self, **kwargs):
     super().__init__(ExtraTreeRegressor(**kwargs))
    pass


def fselection_bfs():
    pass


def fselection_add_del():
    pass


if __name__ == '__main__':
    X, y = load_boston(return_X_y=True)
    X = X[:20, :]
    y = y[:20]
    alg = ExtraTreeRegressor()
    cv = RepeatedKFold(n_splits=5, n_repeats=10, random_state=42)
    n = X.shape[1]

    int_scores = {}
    ext_scores = {}

    for i in range(1, n + 1):
        int_score_tmp1 = inf
        ext_score_tmp1 = inf
        for features in combinations(range(n), i):
            X_cuted = X[:, features]
            int_score_tmp2 = inf
            ext_score_tmp2 = inf
            for train_index, test_index in cv.split(X_cuted):
                X_train, X_test = X_cuted[train_index], X_cuted[test_index]
Пример #37
0
model_maps['random_forest'] = ensemble.RandomForestRegressor(
    n_estimators=10)  #这里使用20个决策树
####3.6Adaboost回归####
from sklearn import ensemble
model_maps['adaboost'] = ensemble.AdaBoostRegressor(
    n_estimators=50)  #这里使用50个决策树
####3.7GBRT回归####
from sklearn import ensemble
model_maps['gradient_boosting'] = ensemble.GradientBoostingRegressor(
    n_estimators=100)  #这里使用100个决策树
####3.8Bagging回归####
from sklearn.ensemble import BaggingRegressor
model_maps['bagging'] = BaggingRegressor()
####3.9ExtraTree极端随机树回归####
from sklearn.tree import ExtraTreeRegressor
model_maps['extra_tree'] = ExtraTreeRegressor()


def get_score(data_path, model_name):
    data = np.memmap(data_path, dtype='float64', mode='r')
    data = np.array(data.reshape((int(len(data) / 46), 46)))
    bad_row = []
    for i in range(data.shape[0]):
        if data[i, 0] == 0:
            bad_row.append(i)
    data = np.delete(data, bad_row, axis=0)
    X = data[:, 2:-1]
    Y = data[:, -1].reshape(-1, 1)
    split_at = int(X.shape[0] * 0.8)
    X_tr = X[:split_at, :]
    X_te = X[split_at:, :]
Пример #38
0
	def Build_MapMean_Model(self):
		MapMean_Model =  ExtraTreeRegressor()

		MapMean_Model.fit(self.MapFeature_list,self.MapMean_list)
		self.Dump_Model('Model/MapMean.model',MapMean_Model)