示例#1
0
def f_getAucKs(pretrpath):
    '''
    计算出AUC和 KS
    :param pretrpath: 使用最优模型预测出的结果路径
    :return:
    '''
    data = pd.read_csv(pretrpath)  # 读入数据 index_name ,target_name, P_value
    ks = ModelUtil.ks(data.P_value, data.iloc[:, 1])
    Auc = ModelUtil.AUC(data.P_value, data.iloc[:, 1])
    return Auc, ks
示例#2
0
def f_getmodelen(model_path):
    '''
    依据模型路径 给出需要输入模型的变量个数
    :param model_path: 模型路径
    :param implen: 重要变量长度
    :return:
    '''
    x = ModelUtil.load_bstmodel(model_path)
    try:
        # xgboost 获取变量的方法
        x = x.feature_names
        modellen = len(x)
    except:
        try:
            # 随机森林的获取方法
            modellen = x[0].n_features_
        except:
            try:

                # gbm 获取入模变量的方法
                modellen = len(x.feature_name())
            except:
                # 评分卡 获取入模变量的方法
                modellen = len(x.params) - 1

    return modellen
示例#3
0
def f_recursionrfModel(train_path, test_path, index_name, target_name, modelconf, i,optimizationType):
    '''
    递归的将上一轮的重要变量重新作为输入 从而达到筛选变量的作用
    :param train_path: 训练集
    :param test_path: 测试集
    :param index_name:
    :param target_name:
    :param modelconf: 模型配置文件路径
    :param i: 记录递归的次数
    :return: 最终递归完成的模型输出 结果的路径列表
    '''
    train_path = ModelUtil.load_data(train_path)
    test_path = ModelUtil.load_data(test_path)
    initmodelmain = ModelMain(train_path, test_path, index_name, target_name)
    initpathlist = initmodelmain.ccxrf_main(modelconf,optimizationType)

    # 1.计算出重要变量的个数
    implen, impvar = f_getImplen(initpathlist[2])
    # 2.计算出模型的AUC和KS
    train_auc, train_ks = f_getAucKs(initpathlist[3])
    test_auc, test_ks = f_getAucKs(initpathlist[4])
    # 3.判断出模型重要变量占总变量的百分比情况
    imppct = f_getVarpctrf(initpathlist[1], implen)  # 入模变量 == 重要变量
    flag = f_flag(train_auc, train_ks, test_auc, test_ks, imppct)
    i = i + 1
    if i < 5:
        if flag:
            print('递归调用 ' * 20)
            newselectcol = impvar + [index_name, target_name]
            print('---入选模型的变量个数%s' % len(newselectcol))
            train_path = ModelUtil.load_data(train_path)[newselectcol]
            test_path = ModelUtil.load_data(test_path)[newselectcol]
            print('##' * 20, i, '##' * 20)
            # 后续优化 递归的同时修改配置文件modelconf
            return f_recursionrfModel(train_path, test_path, index_name, target_name, modelconf, i,optimizationType)

        else:
            print('满足条件结束递归 ' * 10)
            return initpathlist
    else:
        print('递归次数达到要求结束递归' * 10)
        return initpathlist
示例#4
0
def f_splitdata(dummyAfterdf, target_name):
    '''
    切分数据集为训练集和测试集
    :param rawdata:
    :param target_name:
    :return:
    '''
    x_columns = [x for x in dummyAfterdf.columns if x not in [target_name]]
    y_columns = target_name
    tr, te = ModelUtil.splitdata(dummyAfterdf, x_columns, y_columns)
    return tr, te
示例#5
0
def f_getVarpctrf(model_path, implen):
    '''
    依据模型路径 给出需要输入模型的变量个数
    :param model_path: 模型路径
    :param implen: 重要变量长度
    :return:
    '''
    x = ModelUtil.load_bstmodel(model_path)
    try:
        # 随机森林的获取方法
        modellen = x.n_features_
    except:
        modellen = np.nan

    return implen == modellen
示例#6
0
def f_getVarpctgbm(model_path, implen):
    '''
    依据模型路径 给出需要输入模型的变量个数
    :param model_path: 模型路径
    :param implen: 重要变量长度
    :return:
    '''
    x = ModelUtil.load_bstmodel(model_path)
    try:
        # gbm 获取入模变量的方法
        modellen = len(x.feature_name())
    except:
        modellen = np.nan

    return implen == modellen
示例#7
0
def f_getVarpctboost(model_path, implen):
    '''
    依据模型路径 给出需要输入模型的变量个数
    :param model_path: 模型路径
    :param implen: 重要变量长度
    :return:
    '''
    x = ModelUtil.load_bstmodel(model_path)
    try:
        # xgboost 获取变量的方法
        x = x.feature_names
        modellen = len(x)
    except:
        modellen = np.nan

    return implen == modellen
示例#8
0
 def __init__(self, modelname, dummyList, Allcol, bstmodelpath):
     self.modelname = modelname
     self.dummyList = dummyList  # 这个也会获取到
     self.Allcol = Allcol  # f_genAllcol(dummyAfterdf) 可以由这个函数获取到
     self.bstmodelpath = bstmodelpath
     self.bstmodel = ModelUtil.load_bstmodel(bstmodelpath)
示例#9
0
    for x in ls:
        fill_dict[x] = 0
    # print(fill_dict)
    var = dummy2df.fillna(fill_dict)
    return var


# dummyList = list(set(res[4]) - set(res[5]))
# dummyAfterdf = f_dummyOld(tr, dummyList)
# f_dummyNew(te.head(3), dummyList, f_genAllcol(dummyAfterdf))

if __name__ == '__main__':
    # 测试一下 保存模型处理数据的类

    modelpath = r'C:\Users\liyin\Desktop\CcxMLOGE\TestUnit\ccxboost\model20171211190055\modeltxt\model_ccxboost_2017-12-11.txt'
    bst = ModelUtil.load_bstmodel(modelpath)
    Allcol = bst.feature_names
    import numpy as np

    dd = pd.DataFrame(np.ones(len(Allcol))).T
    dd.columns = Allcol
    dd.index = ['All']
    dummyList = list(set([i.split('_')[0] for i in Allcol if '_' in i]))
    psd = processData('ccxboost', dummyList, dd, modelpath)
    psd.modelname
    psd.save('111', r'C:\Users\liyin\Desktop\CcxMLOGE\TestUnit')

    path = 'C:/Users/liyin/Desktop/CcxMLOGE/TestUnit/predict/ccxboost111.model'
    with open(path, 'rb') as f:
        psd_1 = pickle.load(f)