scaled_X = scaler.fit_transform(X) new_X = pd.DataFrame(scaled_X, columns=X.columns) new_X.head from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(new_X, y, test_size=0.33, random_state=42) #check r2 score accuracy for Train data from sklearn.tree import ExtraTreeRegressor model = ExtraTreeRegressor() model.fit(X_train, y_train) print(model.score(X_train, y_train)) #check r2 score accuracy for Test data from sklearn.tree import ExtraTreeRegressor model = ExtraTreeRegressor() model.fit(X_test, y_test) print(model.score(X_test, y_test)) print(model.feature_importances_) imp_feat = pd.Series(model.feature_importances_, index=X.columns) imp_feat.nlargest(5).plot(kind='barh') plt.show() from sklearn.linear_model import LinearRegression lm = LinearRegression() lm.fit(X_train, y_train)
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re,os data=pd.read_csv('ice.csv') x=data[['temp','street']] y=data['ice'] clf=ExtraTreeRegressor() clf.fit(x,y) p=clf.predict(x) print clf.score(x,y) t=np.arange(0.0,31.0) plt.plot(t,data['ice'],'--',t,p,'-') plt.show()
res5 = forest_reg.score(X_test, y_test) print('forest_reg: ', res5) grad_reg = GradientBoostingRegressor(n_estimators=500) grad_reg.fit(X_train, y_train) grad_reg.fit(X_train, y_train) res6 = grad_reg.score(X_test, y_test) print('grad_reg: ', res6) ada_reg = AdaBoostRegressor(n_estimators=200) ada_reg.fit(X_train, y_train) ada_reg.fit(X_train, y_train) res7 = ada_reg.score(X_test, y_test) print('ada_reg: ', res7) decision_reg = DecisionTreeRegressor(random_state=333, min_samples_leaf=3, max_leaf_nodes=5) decision_reg.fit(X_train, y_train) decision_reg.fit(X_train, y_train) res8 = decision_reg.score(X_test, y_test) print('decision_reg: ', res8) extraTree_reg = ExtraTreeRegressor(random_state=333, min_samples_leaf=3, max_leaf_nodes=5) extraTree_reg.fit(X_train, y_train) extraTree_reg.fit(X_train, y_train) res9 = extraTree_reg.score(X_test, y_test) print('extraTree_reg: ', res9)
# In[ ]: from sklearn.tree import ExtraTreeRegressor dtr = ExtraTreeRegressor() # In[ ]: # Fit model dtr.fit(X_train, y_train) # In[ ]: # Fit model dtr.score(X_train, y_train) # ----------------- # <a id="8"></a> <br> # ## 8- Conclusion # This kernel is not completed yet, I will try to cover all the parts related to the process of ML with a variety of Python packages and I know that there are still some problems then I hope to get your feedback to improve it. # You can follow me on: # <br> # > ###### [ GitHub](https://github.com/mjbahmani) # <br> # -------------------------------------- # # **I hope you find this kernel helpful and some <font color="red"><b>UPVOTES</b></font> would be very much appreciated** # <a id="9"></a> <br>
from math import * import pandas as pd import numpy as np from sklearn.tree import ExtraTreeRegressor import matplotlib.pyplot as plt import re, os data = pd.read_csv('ice.csv') x = data[['temp', 'street']] y = data['ice'] clf = ExtraTreeRegressor() clf.fit(x, y) p = clf.predict(x) print clf.score(x, y) t = np.arange(0.0, 31.0) plt.plot(t, data['ice'], '--', t, p, '-') plt.show()
#print("Best score on train set:{:.2f}".format(svr.best_score_)) #y_pred = svr.predict(X_test) '''lgb''' # gbm = ExtraTreeRegressor() gbm = GridSearchCV(gbm, param_grid={"min_samples_leaf":[1,4,8,16,32],\ 'min_samples_split':[4,10,20,100],\ 'max_depth':[2,8,16,32]}, cv=6) gbm.fit(X_train, y_train) y_pred = gbm.predict(X_test) # eval print("MSE:", metrics.mean_squared_error(y_test, y_pred)) print("Test set score:{:.2f}".format(gbm.score(X_test, y_test))) #print("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, y_pred)) fig, ax = plt.subplots() ax.scatter(y_test, y_pred) ax.plot([y_test.min(), y_test.max()], [y_pred.min(), y_pred.max()], 'k--', lw=4) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') plt.show() with open('model.pickle', 'wb') as fw: pickle.dump(gbm, fw) #with open('model.pickle', 'rb') as fr: