def xgboostmodel(self): df = pd.read_csv(datafile, encoding='utf-8', index_col=0) print(df.shape) traindata = df.iloc[:, :].values x = traindata[:, :-1] y = traindata[:, -1] x_train, x_test, y_train, y_test = train_test_split( x, y, train_size=0.7) # list if self.params is None: params = {'max_depth': 80, 'n_estimators': 512} else: params = self.params raw_model = XGBRegressor(max_depth=128, n_estimators=768, learning_rate=0.01, silence=False) raw_model.fit(x_train, y_train) raw_model.save_model(self.model_file) pred = raw_model.predict(x_test) self.true = y_test self.pred = pred self.show_save_figure(fig_path=self.fig_path, modelname=self.job_name, detal_idx=500) t_mean = self.cal_mean(self.true) p_mean = self.cal_mean(self.pred) self.save_result(self.result_path, true_mean=t_mean, pred_mean=p_mean)
plt.savefig('C:/Users/leech/OneDrive/Desktop/Analytics RossMann/rossmann-store-sales/image/feature_importance.png') plt.close() # Predicting on the testSet predictions = clf.predict(test_x) # Calculating the RMSE rms = math.sqrt(mean_squared_error(test_y.values, predictions)) print(rms) # Print the discrepancies combinedResult=pd.DataFrame(data={'Store':testSet['Store'].values.ravel(), 'Date':testSet['Date'].values.ravel(),'predictions': predictions, 'actual': test_y.values.ravel()}) print(combinedResult) # Show the predictions for store 1 combinedResult1=combinedResult.loc[combinedResult['Store']==1] # gca stands for 'get current axis' ax = plt.gca() # Saving the predicted graph for store 1 combinedResult1.plot(kind='line',x='Date',y='actual',ax=ax) combinedResult1.plot(kind='line',x='Date',y='predictions', color='red', ax=ax) plt.savefig('C:/Users/leech/OneDrive/Desktop/Analytics RossMann/rossmann-store-sales/image/store1.png') plt.show() plt.close() # Saving the model clf.save_model('C:/Users/leech/OneDrive/Desktop/Analytics RossMann/rossmann-store-sales/model/finalmodel.model')