def f_getmodelen(model_path): ''' 依据模型路径 给出需要输入模型的变量个数 :param model_path: 模型路径 :param implen: 重要变量长度 :return: ''' x = ModelUtil.load_bstmodel(model_path) try: # xgboost 获取变量的方法 x = x.feature_names modellen = len(x) except: try: # 随机森林的获取方法 modellen = x[0].n_features_ except: try: # gbm 获取入模变量的方法 modellen = len(x.feature_name()) except: # 评分卡 获取入模变量的方法 modellen = len(x.params) - 1 return modellen
def f_getVarpctrf(model_path, implen): ''' 依据模型路径 给出需要输入模型的变量个数 :param model_path: 模型路径 :param implen: 重要变量长度 :return: ''' x = ModelUtil.load_bstmodel(model_path) try: # 随机森林的获取方法 modellen = x.n_features_ except: modellen = np.nan return implen == modellen
def f_getVarpctgbm(model_path, implen): ''' 依据模型路径 给出需要输入模型的变量个数 :param model_path: 模型路径 :param implen: 重要变量长度 :return: ''' x = ModelUtil.load_bstmodel(model_path) try: # gbm 获取入模变量的方法 modellen = len(x.feature_name()) except: modellen = np.nan return implen == modellen
def f_getVarpctboost(model_path, implen): ''' 依据模型路径 给出需要输入模型的变量个数 :param model_path: 模型路径 :param implen: 重要变量长度 :return: ''' x = ModelUtil.load_bstmodel(model_path) try: # xgboost 获取变量的方法 x = x.feature_names modellen = len(x) except: modellen = np.nan return implen == modellen
def __init__(self, modelname, dummyList, Allcol, bstmodelpath): self.modelname = modelname self.dummyList = dummyList # 这个也会获取到 self.Allcol = Allcol # f_genAllcol(dummyAfterdf) 可以由这个函数获取到 self.bstmodelpath = bstmodelpath self.bstmodel = ModelUtil.load_bstmodel(bstmodelpath)
for x in ls: fill_dict[x] = 0 # print(fill_dict) var = dummy2df.fillna(fill_dict) return var # dummyList = list(set(res[4]) - set(res[5])) # dummyAfterdf = f_dummyOld(tr, dummyList) # f_dummyNew(te.head(3), dummyList, f_genAllcol(dummyAfterdf)) if __name__ == '__main__': # 测试一下 保存模型处理数据的类 modelpath = r'C:\Users\liyin\Desktop\CcxMLOGE\TestUnit\ccxboost\model20171211190055\modeltxt\model_ccxboost_2017-12-11.txt' bst = ModelUtil.load_bstmodel(modelpath) Allcol = bst.feature_names import numpy as np dd = pd.DataFrame(np.ones(len(Allcol))).T dd.columns = Allcol dd.index = ['All'] dummyList = list(set([i.split('_')[0] for i in Allcol if '_' in i])) psd = processData('ccxboost', dummyList, dd, modelpath) psd.modelname psd.save('111', r'C:\Users\liyin\Desktop\CcxMLOGE\TestUnit') path = 'C:/Users/liyin/Desktop/CcxMLOGE/TestUnit/predict/ccxboost111.model' with open(path, 'rb') as f: psd_1 = pickle.load(f)