def ExtraTreeGS(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() grid_values = { 'criterion': ["mse", "mae"], 'max_depth': list(range(20, 25)) } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params) logSave(nameOfModel="ExtraTreeGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def fit(x, y): clf = ExtraTreeRegressor( #clf = DecisionTreeRegressor( max_depth=10, max_features=100, min_impurity_decrease=0.000001) clf.fit(x, y) return clf
def get_regressor(training_set): """ Estimation of the value function using a regression algorithm. Training set contains tuples of (state, score) V: S -> R """ clf = ExtraTreeRegressor() clf.fit(*zip(*training_set)) return clf
def ExtraTreeRegressorPrediction(train_X, train_y, test_X, valid_X, valid_y): etr = ExtraTreeRegressor() etr.fit(train_X, train_y) result = etr.predict(test_X) valid_ypred = etr.predict(valid_X) valid_mape = mape_loss(valid_y, valid_ypred) print ' the mape score of ExtraTreeRegressor in valid set is :', valid_mape return result
def build_lonely_tree_regressor(X, y, max_features, max_depth, min_samples_split): clf = ExtraTreeRegressor(max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split) clf = clf.fit(X, y) return clf
def test_extra_tree_reg(): X, y = load_iris(return_X_y=True) X_ = X.tolist() for y_ in [(y == 0).astype(int), (y == 2).astype(int)]: for max_depth in [5, 10, None]: clf = ExtraTreeRegressor(max_depth=max_depth, random_state=5) clf.fit(X, y_) clf_ = convert_estimator(clf) for method in ["predict"]: with warnings.catch_warnings(): warnings.simplefilter("ignore") scores = getattr(clf, method)(X) scores_ = getattr(clf_, method)(X_) assert np.allclose(scores.shape, shape(scores_)) assert np.allclose(scores, scores_, equal_nan=True)
def ExtraTree(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="ExtraTree", reg=reg, metrics=metrics, val_metrics=val_metrics)
def fit(self, X, y=None, **fit_params): """Fit estimator to X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where `n_samples` is the number of samples and `n_features` is the number of features. y : None, default=None Not used in the fitting process but kept for compatibility. fit_params : dict, optional Optional extra fit parameters. Returns ------- self : estimator Returns the instance itself. """ # Just make y a random gaussian variable X = check_array(X) rng = check_random_state(self.random_state) y_rand = rng.randn(X.shape[0]) tree_est = ExtraTreeRegressor( min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, max_features=1, # Completely random tree splitter='random', random_state=rng, ) tree_est.fit(X, y_rand, **fit_params) self.tree_ = tree_est.tree_ return self
ri_MakingLT_prepared_train, ri_MakingLT_prepared_test, ri_MakingLT_labels_train, ri_MakingLT_labels_test = train_test_split( ri_MakingLT_prepared, ri_MakingLT_labels, test_size=0.20, random_state=42) # Training Data는 Training Data_really,Training Data_val 분리 ri_MakingLT_prepared_train_re, ri_MakingLT_prepared_train_val, ri_MakingLT_labels_train_re, ri_MakingLT_labels_train_val = train_test_split( ri_MakingLT_prepared_train, ri_MakingLT_labels_train, test_size=0.25, random_state=42) ###**ExtraTreesRegressor**### # **ExtraTreesRegressor** 모델 훈련 시킴 from sklearn.tree import ExtraTreeRegressor Et_tree_reg = ExtraTreeRegressor(max_depth=11, random_state=42) Et_tree_reg.fit(ri_MakingLT_prepared_train, ri_MakingLT_labels_train) ri_MakingLT_predicted = Et_tree_reg.predict(ri_MakingLT_prepared_test) from sklearn.metrics import mean_squared_error Et_tree_reg_mse = mean_squared_error(ri_MakingLT_labels_test, ri_MakingLT_predicted) Et_tree_reg_rmse = np.sqrt(Et_tree_reg_mse) print(Et_tree_reg_rmse) from sklearn.metrics import mean_absolute_error Et_tree_reg_mae = mean_absolute_error(ri_MakingLT_labels_test, ri_MakingLT_predicted) print(Et_tree_reg_mae) Et_tree_reg_mape = (np.abs((ri_MakingLT_predicted - ri_MakingLT_labels_test) / ri_MakingLT_labels_test).mean(axis=0))
scaler = StandardScaler() scaled_X = scaler.fit_transform(X) new_X = pd.DataFrame(scaled_X, columns=X.columns) new_X.head from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(new_X, y, test_size=0.33, random_state=42) #check r2 score accuracy for Train data from sklearn.tree import ExtraTreeRegressor model = ExtraTreeRegressor() model.fit(X_train, y_train) print(model.score(X_train, y_train)) #check r2 score accuracy for Test data from sklearn.tree import ExtraTreeRegressor model = ExtraTreeRegressor() model.fit(X_test, y_test) print(model.score(X_test, y_test)) print(model.feature_importances_) imp_feat = pd.Series(model.feature_importances_, index=X.columns) imp_feat.nlargest(5).plot(kind='barh') plt.show() from sklearn.linear_model import LinearRegression lm = LinearRegression()
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re,os data=pd.read_csv('ice.csv') x=data[['temp','street']] y=data['ice'] clf=ExtraTreeRegressor() clf.fit(x,y) p=clf.predict(x) print clf.score(x,y) t=np.arange(0.0,31.0) plt.plot(t,data['ice'],'--',t,p,'-') plt.show()
def Build_MapMean_Model(self): knn_MapMean = ExtraTreeRegressor() knn_MapMean.fit(self.MapFeature_list,self.MapMean_list) print knn_MapMean.feature_importances_ self.Dump_Model('Model/MapMean.model',knn_MapMean)
from pandas import read_csv from sklearn.tree import ExtraTreeRegressor # load data dataframe = read_csv('useformodel.csv') array = dataframe.values X = array[:, 0:26] Y = array[:, 26] # feature extraction model = ExtraTreeRegressor(random_state=0) model.fit(X, Y) print(model.feature_importances_)
dt.score(X_train, y_train) # <a id="79"></a> <br> # ## 7-9 ExtraTreeRegressor # In[ ]: from sklearn.tree import ExtraTreeRegressor dtr = ExtraTreeRegressor() # In[ ]: # Fit model dtr.fit(X_train, y_train) # In[ ]: # Fit model dtr.score(X_train, y_train) # ----------------- # <a id="8"></a> <br> # ## 8- Conclusion # This kernel is not completed yet, I will try to cover all the parts related to the process of ML with a variety of Python packages and I know that there are still some problems then I hope to get your feedback to improve it. # You can follow me on: # <br> # > ###### [ GitHub](https://github.com/mjbahmani) # <br>
# In[848]: ETR = ExtraTreeRegressor() # In[849]: ETR # In[856]: ETR.fit(x, y) # In[857]: ETR_prediction = ETR.predict(x_test) plt.plot(ETR_prediction[0], label = 'prediction') plt.plot(y_test.iloc[0], label = 'real') # In[858]: print('mean_absolute_error', mean_absolute_error(y_test, ETR_prediction)) print('mean_squared_error', mean_squared_error(y_test, ETR_prediction))
mdae_t = [] evs_t = [] r2_t = [] for tr_i, ts_i in rkf.split(data): print(i, j, k, c) train, test = data.iloc[tr_i], data.iloc[ts_i] train_x = train.drop(columns=['Rainfall']) train_y = train['Rainfall'] test_x = test.drop(columns=['Rainfall']) test_y = test['Rainfall'] model = ExtraTreeRegressor(criterion='mse', splitter='best', max_depth=i, min_samples_leaf=j, min_samples_split=k) model.fit(train_x, train_y) ts_p = model.predict(test_x) mse_t.append(mse(test_y, ts_p)) rmse_t.append(rmse(test_y, ts_p)) mae_t.append(mae(test_y, ts_p)) mdae_t.append(mdae(test_y, ts_p)) evs_t.append(evs(test_y, ts_p)) r2_t.append(r2(test_y, ts_p)) c += 1 dep_f.append(i) saml_f.append(j) sams_f.append(k) mse_f.append(np.mean(mse_t)) rmse_f.append(np.mean(rmse_t)) mae_f.append(np.mean(mae_t)) mdae_f.append(np.mean(mdae_t))
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re, os data = pd.read_csv('ice.csv') x = data[['temp', 'street']] y = data['ice'] clf = ExtraTreeRegressor() clf.fit(x, y) p = clf.predict(x) print clf.score(x, y) t = np.arange(0.0, 31.0) plt.plot(t, data['ice'], '--', t, p, '-') plt.show()
sc=StandardScaler() df.iloc[:,:]=sc.fit_transform(df.iloc[:,:]) # Feature Selection # univariate Selection from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression,chi2 best_features=SelectKBest(score_func=f_regression,k='all') best_features.fit(df.iloc[:,1:],df.iloc[:,0]) feature_scores=pd.DataFrame(best_features.scores_,index=df.iloc[:,1:].columns) feature_scores.plot(kind='barh') # Feature Selection from sklearn.tree import ExtraTreeRegressor regressor=ExtraTreeRegressor() regressor.fit(df.iloc[:,1:],df.iloc[:,0]) importance_score=pd.Series(regressor.feature_importances_,index=df.iloc[:,1:].columns) importance_score.plot(kind='barh') # Segregating feature & target columns x=df.iloc[:,1:] y=df.iloc[:,0] # Modelling from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0) # Ridge Regression from sklearn.linear_model import Ridge from sklearn.model_selection import GridSearchCV from sklearn.metrics import r2_score
class ExtraTreeClass: """ Name : ExtraTreeRegressor Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'extratree' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = ExtraTreeRegressor() # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename( self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
#splitting datasets X = df.iloc[:, :-1] y = df.iloc[:, -1].values y = y.reshape(-1, 1) #handling missing vlaues(0 in pm2.5) from sklearn.impute import SimpleImputer im = SimpleImputer(missing_values=0, strategy='mean') im = im.fit(y) y = im.transform(y) #feature selection from sklearn.tree import ExtraTreeRegressor model = ExtraTreeRegressor() model.fit(X, y) print(model.feature_importances_) feat_imp = pd.Series(model.feature_importances_, index=X.columns) feat_imp.nlargest(5).plot( kind='barh') #picking 5 columns that are in corelations with pm2.5 plt.show() from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) from sklearn.ensemble import RandomForestRegressor reg = RandomForestRegressor() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) print('r^2 value of train set:', reg.score(X_train, y_train))
def predict_extra_tree(train_X, train_Y, test, param=30): clf = ExtraTreeRegressor(min_samples_leaf=param, min_samples_split=1, criterion='mse') clf.fit(train_X, train_Y) preds = clf.predict(test) return preds
def Build_MapMean_Model(self): MapMean = ExtraTreeRegressor() MapMean.fit(self.MapFeature_list,(self.MapMean_list)) self.Dump_Model('Model/MapMean.model',MapMean) print MapMean.feature_importances_
def doregress(X_train, y_train, n_train, X_test, y_test, n_test, band, fnames): lin = LinearRegression() lin.fit(X_train, y_train) lres = lin.predict(X_test) - y_test zl, ml, sl, fl = summstats(z_test, lres, n_test) #gbr1 = GradientBoostingRegressor(loss="ls") #gbr2 = GradientBoostingRegressor(loss="lad") #gbr1.fit(X_train, y_train) #gbr2.fit(X_train, y_train) #g1res = gbr1.predict(X_test) - y_test #g2res = gbr2.predict(X_test) - y_test #g1z, g1med, g1std = summstats(z_test, g1res) #g2z, g2med, g2std = summstats(z_test, g2res) #ada = AdaBoostRegressor() #ada.fit(X_train, y_train) #ares = ada.predict(X_test) - y_test #az, amed, astd = summstats(z_test, ares) # Some of these appear to be unstable # I.e. feature importance changes #for extension in ("A", "B", "C", "D", "E"): for extension in ("A",): print "# Regressing", extension xtr = ExtraTreeRegressor() xtr.fit(X_train, y_train) zx, mx, sx, fx = doplot(xtr, X_test, y_test, z_test, n_test, fnames, "%s-band ExtraTreeRegressor"%(band), "R_%s_%s_ext.png"%(band, extension)) xtrw = ExtraTreeRegressor() xtrw.fit(X_train, y_train, sample_weight=np.log10(n_train)) zxw, mxw, sxw, fxw = doplot(xtrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted ExtraTreeRegressor"%(band), "R_%s_%s_ext_weight.png"%(band, extension)) #### tree = DecisionTreeRegressor() tree.fit(X_train, y_train) zt, mt, st, ft = doplot(tree, X_test, y_test, z_test, n_test, fnames, "%s-band DecisionTreeRegressor"%(band), "R_%s_%s_tree.png"%(band, extension)) treew = DecisionTreeRegressor() treew.fit(X_train, y_train, sample_weight=np.log10(n_train)) ztw, mtw, stw, ftw = doplot(treew, X_test, y_test, z_test, n_test, fnames, "%s-band weighted DecisionTreeRegressor"%(band), "R_%s_%s_tree_weight.png"%(band, extension)) #### weights = n_train nt = 50 rfr = RandomForestRegressor(n_estimators=nt) rfr.fit(X_train, y_train) zr, mr, sr, fr = doplot(rfr, X_test, y_test, z_test, n_test, fnames, "%s-band RandomForestRegressor"%(band), "R_%s_%s_%d_rfr.png"%(band, extension, nt)) rfrw = RandomForestRegressor(n_estimators=nt) rfrw.fit(X_train, y_train, sample_weight=weights) zrw, mrw, srw, frw = doplot(rfrw, X_test, y_test, z_test, n_test, fnames, "%s-band weighted RandomForestRegressor"%(band), "R_%s_%s_%d_rfr_weight.png"%(band, extension, nt)) print "RF %d : %.5e +/- %.5e vs weighted %.5e +/- %.5e" % (nt, np.median(fr), 0.741 * (np.percentile(fr, 75) - np.percentile(fr, 25)), np.median(frw), 0.741 * (np.percentile(frw, 75) - np.percentile(frw, 25))) #### # Compare all models fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12)) sp1.plot(zl, ml, "r-", label="LinearRegression") sp1.plot(zt, mt, "b-", label="DecisionTreeRegressor") sp1.plot(zr, mr, "g-", label="RandomForestRegressor") sp1.plot(zx, mx, "m-", label="ExtraTreeRegressor") sp2.plot(zl[np.where(sl>0.)], sl[np.where(sl>0.)], "r-") sp2.plot(zt[np.where(st>0.)], st[np.where(st>0.)], "b-") sp2.plot(zr[np.where(sr>0.)], sr[np.where(sr>0.)], "g-") sp2.plot(zx[np.where(sx>0.)], sx[np.where(sx>0.)], "m-") ymin, ymax = sp2.get_ylim() sp2.set_ylim(max(1e-7,ymin), 1e-1) sp3.plot(zl[np.where(fl>0.)], fl[np.where(fl>0.)], "r-") sp3.plot(zt[np.where(ft>0.)], ft[np.where(ft>0.)], "b-") sp3.plot(zr[np.where(fr>0.)], fr[np.where(fr>0.)], "g-") sp3.plot(zx[np.where(fx>0.)], fx[np.where(fx>0.)], "m-") ymin, ymax = sp3.get_ylim() sp3.set_ylim(max(1e-7,ymin), 1.1) sp1.legend(loc=2, fancybox=True) sp1.set_title("Mean refraction residual (arcsec)", weight="bold") sp2.set_ylabel("RMS residual (arcsec)", weight="bold") sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold") sp3.set_xlabel("Zenith distance (deg)", weight="bold") sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5) sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5) sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5) sp2.semilogy() sp3.semilogy() plt.savefig("R_%s_%s.png" % (band, extension)) ### fig, (sp1, sp2, sp3) = plt.subplots(3, 1, sharex=True, figsize=(16,12)) sp1.plot(zl, ml, "r-", label="LinearRegression") sp1.plot(ztw, mtw, "b-", label="DecisionTreeRegressor weighted") sp1.plot(zrw, mrw, "g-", label="RandomForestRegressor weighted") sp1.plot(zxw, mxw, "m-", label="ExtraTreeRegressor weighted") sp2.plot(zl[np.where(sl>0.)], sl[np.where(sl>0.)], "r-") sp2.plot(ztw[np.where(stw>0.)], stw[np.where(stw>0.)], "b-") sp2.plot(zrw[np.where(srw>0.)], srw[np.where(srw>0.)], "g-") sp2.plot(zxw[np.where(sxw>0.)], sxw[np.where(sxw>0.)], "m-") ymin, ymax = sp2.get_ylim() sp2.set_ylim(max(1e-7,ymin), 1e-1) sp3.plot(zl[np.where(fl>0.)], fl[np.where(fl>0.)], "r-") sp3.plot(ztw[np.where(ftw>0.)], ftw[np.where(ftw>0.)], "b-") sp3.plot(zrw[np.where(frw>0.)], frw[np.where(frw>0.)], "g-") sp3.plot(zxw[np.where(fxw>0.)], fxw[np.where(fxw>0.)], "m-") ymin, ymax = sp3.get_ylim() sp3.set_ylim(max(1e-7,ymin), 1.1) sp1.legend(loc=2, fancybox=True) sp1.set_title("Mean refraction residual (arcsec)", weight="bold") sp2.set_ylabel("RMS residual (arcsec)", weight="bold") sp3.set_ylabel("f_tot with dR>%.3f"%(dcrLevel), weight="bold") sp3.set_xlabel("Zenith distance (deg)", weight="bold") sp1.axhline(y=0, c='k', linestyle='--', alpha=0.5) sp2.axhline(y=dcrLevel, c='k', linestyle='--', alpha=0.5) sp3.axhline(y=0.01, c='k', linestyle='--', alpha=0.5) sp2.semilogy() sp3.semilogy() plt.savefig("R_%s_%s_weight.png" % (band, extension))
random_state=42) """##Model Selection""" from sklearn.ensemble import RandomForestRegressor rfr = RandomForestRegressor() rfr.fit(X_train, y_train) r2_score(y_test, rfr.predict(X_test)) mean_squared_error(y_test, rfr.predict(X_test)) X_train.columns.shape forest.feature_importances_.shape forest = ExtraTreeRegressor() forest.fit(X_train, y_train) importances = forest.feature_importances_ indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") for f in range(X_train.shape[1]): print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]])) # Plot the impurity-based feature importances of the forest plt.figure() plt.title("Feature importances") plt.bar(range(X_train.shape[1]), importances[indices],
et_m_Outputdata = et_MakingLT[['MakingLT']] # 학습모델 구축을 위해 data형식을 Vector로 변환 et_X1 = et_m_Inputdata.values et_Y1 = et_m_Outputdata.values # Training Data, Test Data 분리 et_X1_train, et_X1_test, et_Y1_train, et_Y1_test = train_test_split( et_X1, et_Y1, test_size=0.33, random_state=42) ######################################################################################################################## # ExtraTree 모델 구축 making_extratree_model = ExtraTreeRegressor(max_depth=10, random_state=42) making_extratree_model.fit(et_X1_train, et_Y1_train) et_m_predicted = making_extratree_model.predict(et_X1_test) et_m_predicted[et_m_predicted < 0] = 0 # [1,n]에서 [n,1]로 배열을 바꿔주는 과정을 추가 et_length_x1test = len(et_X1_test) et_m_predicted = et_m_predicted.reshape(et_length_x1test, 1) # 학습 모델 성능 확인 et_m_mae = abs(et_m_predicted - et_Y1_test).mean(axis=0) et_m_mape = (np.abs((et_m_predicted - et_Y1_test) / et_Y1_test).mean(axis=0)) et_m_rmse = np.sqrt(((et_m_predicted - et_Y1_test)**2).mean(axis=0)) et_m_rmsle = np.sqrt( (((np.log(et_m_predicted + 1) - np.log(et_Y1_test + 1))**2).mean(axis=0)))
def getModel(x, y): et = ExtraTreeRegressor() et.fit(x, y) #joblib.dump(et,'./model/et')#保存模型 return et
res5 = forest_reg.score(X_test, y_test) print('forest_reg: ', res5) grad_reg = GradientBoostingRegressor(n_estimators=500) grad_reg.fit(X_train, y_train) grad_reg.fit(X_train, y_train) res6 = grad_reg.score(X_test, y_test) print('grad_reg: ', res6) ada_reg = AdaBoostRegressor(n_estimators=200) ada_reg.fit(X_train, y_train) ada_reg.fit(X_train, y_train) res7 = ada_reg.score(X_test, y_test) print('ada_reg: ', res7) decision_reg = DecisionTreeRegressor(random_state=333, min_samples_leaf=3, max_leaf_nodes=5) decision_reg.fit(X_train, y_train) decision_reg.fit(X_train, y_train) res8 = decision_reg.score(X_test, y_test) print('decision_reg: ', res8) extraTree_reg = ExtraTreeRegressor(random_state=333, min_samples_leaf=3, max_leaf_nodes=5) extraTree_reg.fit(X_train, y_train) extraTree_reg.fit(X_train, y_train) res9 = extraTree_reg.score(X_test, y_test) print('extraTree_reg: ', res9)
def Build_MapMean_Model(self): MapMean_Model = ExtraTreeRegressor() MapMean_Model.fit(self.MapFeature_list,self.MapMean_list) self.Dump_Model('Model/MapMean.model',MapMean_Model)
from sklearn.ensemble import RandomForestRegressor rf_model=RandomForestRegressor(n_estimators=700,random_state=42) rf_model.fit(x_train,y_train) y_predict=rf_model.predict(x_test) r2_score(y_test,y_predict.ravel()) # ### ExtraTreeRegressor # In[85]: from sklearn.tree import ExtraTreeRegressor extratree_model=ExtraTreeRegressor(random_state=42) extratree_model.fit(x_train,y_train) y_predict=extratree_model.predict(x_test) r2_score(y_test,y_predict.ravel()) # ### Result # # So from here we can conclude that out of multiple models RandomForestRegressor model is working well with 90.66% accuracy. which is a very good accuracy. # In[86]: # Using pickle we will save our model so that we can use it further import pickle pickle.dump(extratree_model,open('model.pkl','wb')) model=pickle.load(open('model.pkl','rb'))
def ExtraTreesModel(self, train_x, train_y): print('begin train ExtraTrees') model = ExtraTreeRegressor() model.fit(train_x, train_y) return model
n = X.shape[1] int_scores = {} ext_scores = {} for i in range(1, n + 1): int_score_tmp1 = inf ext_score_tmp1 = inf for features in combinations(range(n), i): X_cuted = X[:, features] int_score_tmp2 = inf ext_score_tmp2 = inf for train_index, test_index in cv.split(X_cuted): X_train, X_test = X_cuted[train_index], X_cuted[test_index] y_train, y_test = y[train_index], y[test_index] alg.fit(X_train, y_train) y_pred = alg.predict(X_train) error = mean_squared_error(y_train, y_pred) int_score_tmp2 = min(int_score_tmp2, error) y_pred = alg.predict(X_test) error = mean_squared_error(y_test, y_pred) ext_score_tmp2 = min(ext_score_tmp2, error) int_score_tmp1 = min(int_score_tmp1, int_score_tmp2) ext_score_tmp1 = min(ext_score_tmp1, ext_score_tmp2) int_scores[i] = int_score_tmp1 ext_scores[i] = ext_score_tmp1 print(int_scores, ext_scores)
def getExtraTreeModel(x, y): et = ExtraTreeRegressor() et.fit(x, y) return et
X1 = cols[0:11] #X1 = preprocessing.normalize(X1) X = list(zip(*X1)) Y = cols[11] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=rn) X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) #print(y_test) lin_reg_mod = ExtraTreeRegressor() lin_reg_mod.fit(X_train, y_train) pred = lin_reg_mod.predict(X_test) #print(pred) #print(y_test) test_set_r2 = r2_score(y_test, pred) #print(test_set_r2) tr2+=test_set_r2 #abs_er = mean_absolute_error(y_test, pred) #tabse+=abs_er temp = [] for (i,j) in zip(y_test, pred): t = (abs(i-j))/float(i) temp.append(t) #print(temp)
from sklearn.tree import DecisionTreeRegressor # Define model. Specify a number for random_state to ensure same results each run dt = DecisionTreeRegressor(random_state=1) # Fit model dt.fit(X_train, y_train) dt_prediction = dt.predict(X_test) dt_score = accuracy_score(y_test, dt_prediction) print(dt_score) from sklearn.tree import ExtraTreeRegressor # Define model. Specify a number for random_state to ensure same results each run etr = ExtraTreeRegressor() # Fit model etr.fit(X_train, y_train) etr_prediction = etr.predict(X_test) etr_score = accuracy_score(y_test, etr_prediction) print(etr_score) X_train = df_train.drop("Survived", axis=1) y_train = df_train["Survived"] X_train = X_train.drop("PassengerId", axis=1) X_test = df_test.drop("PassengerId", axis=1) xgboost = xgb.XGBClassifier(max_depth=3, n_estimators=300, learning_rate=0.05).fit(X_train, y_train) Y_pred = xgboost.predict(X_test) submission = pd.DataFrame({ "PassengerId": df_test["PassengerId"], "Survived": Y_pred }) submission.to_csv('submission.csv', index=False)
def extra_tree_regressor(self): x_train, x_test, y_train, y_test = self.preprocessing() model = ExtraTreeRegressor() y_pred = model.fit(x_train, y_train).predict(x_test) self.printing(y_test, y_pred, 'Extra Tree')
import matplotlib.pyplot as plt import seaborn as sb df=pd.read_csv('Data/Real-Data/Real_combine.csv') df=df.dropna() sb.pairplot(df) df.corr() X=df.iloc[:,:-1] y=df.iloc[:,-1] from sklearn.tree import ExtraTreeRegressor et=ExtraTreeRegressor() et.fit(X,y) print(et.feature_importances_) feat_imp=pd.Series(et.feature_importances_,index=X.columns) feat_imp.nlargest(5).plot(kind='barh') plt.show() from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30) from keras.models import Sequential from keras.layers import Dense regressor=Sequential() #adding input layer and first hidden layer regressor.add(Dense(units=128,kernel_initializer='normal',input_dim=X_train.shape[1],activation='relu'))