def fit(self, x, y): if self.method == 'poly': FS = Filter_Selection('pearson', TopN=self.TopN) FS.fit(x, y) new_x = FS.transform(x) Dchange = Data_Preprocess.Data_Change('poly') Dchange.fit(new_x) DChange_new_x = Dchange.transform(new_x) standard = Data_Preprocess.Data_Change('avgstd') standard.fit(DChange_new_x) #赋值 self.Filter_Selection = FS self.DChange = Dchange self.Standard = standard
def get_vip(self,isplot=True): #计算关键因子重要性 col_name = 'variable importance' if self.method in ['knn','dt','svm','bp']: res = None else: if self.method in ['logistic'] : mean_coef = pd.DataFrame(abs(self.cls_model.coef_)).T.mean(axis=1) var_importance = pd.DataFrame(mean_coef.values,index = self.factor_name , columns = [col_name]) # var_importance = pd.DataFrame(abs(self.cls_model.coef_),index = [col_name] ,columns = self.factor_name) elif self.method in ['rf','adaBoost','gbm','xgb']: coef = self.cls_model.best_estimator_.feature_importances_.reshape(-1,1) var_importance = pd.DataFrame(abs(coef),columns = [col_name] ,index = self.factor_name) res = var_importance.sort_values(col_name) #对因子重要性进行归一化。 Dchange = Data_Preprocess.Data_Change('minmax') Dchange.fit(res) res = Dchange.transform(res) #画条形图 if isplot: plt = Data_plot.plot_bar_analysis(res) plt.title('variable importance') plt.show() return res
def get_vip(self,isplot = True): #计算关键因子, if self.method in ['svr','knn','dt','bp']: #上述算法没有办法衡量重要因子 return None else: col_name = 'variable importance' if self.method in ['linear'] : var_importance = pd.DataFrame(abs(self.reg_model.coef_),columns = [col_name] , index= self.factor_name) elif self.method in ['ridge','lasso','ElasticNet','pls']: coef = self.reg_model.best_estimator_.coef_.reshape(-1,1) var_importance = pd.DataFrame(abs(coef),columns = [col_name] ,index = self.factor_name) elif self.method in ['rf','adaBoost','gbm','xgb']: # var_importance = None coef = self.reg_model.best_estimator_.feature_importances_.reshape(-1,1) var_importance = pd.DataFrame(abs(coef),columns = [col_name] ,index = self.factor_name) res = var_importance.sort_values(col_name,ascending = False) #对因子重要性进行归一化。 Dchange = Data_Preprocess.Data_Change('minmax') Dchange.fit(res) res = Dchange.transform(res) #画条形图 if isplot: plt = Data_plot.plot_bar_analysis(res,Top=15) plt.title('variable importance') plt.show() return res
def data_change(self, method='minmax'): Dchange = Data_Preprocess.Data_Change(method=method) Dchange.fit(self.x) self.x = Dchange.transform(self.x) self.data.loc[:, self.x.columns] = self.x #新增工序list self.Pipeline_list.append(('data_change', Dchange)) self.data_change_model = Dchange
def predict(self,x): ''' 预测: ''' res = [] for model_name in self.listModelName: sub_model_res = [] for sub_model in self.train_model[model_name]: sub_model_res.append(pd.DataFrame(sub_model.predict(x))) #子模型结果融合 sub_model_res = pd.concat(sub_model_res,axis = 1).mean(axis = 1) res.append(sub_model_res) #不同模型结果融合 if self.stack_method == 'avg': res = pd.concat(res,axis = 1).mean(axis = 1) elif self.stack_method == 'weight': res = pd.concat(res,axis = 1).values #对mse进行归一化 mse = pd.DataFrame(self.mse_list) Dchange = Data_Preprocess.Data_Change('minmax') mse = Dchange.fit_transform(mse) weight = np.array(mse).reshape(len(res),1) res = np.dot(res,weight) return res.values