Exemplo n.º 1
0
def getMyRFAcclist(treenum_list):
    seed(1)  # 每一次执行本文件时都能产生同一个随机数
    filename = 'DataSet3.csv'  #SMOTE处理过的数据
    min_size = 1
    sample_ratio = 1
    feature_ratio = 0.3  # 尽可能小,但是要保证 int(self.feature_ratio * (len(train[0])-1)) 大于1
    same_value = 20  # 向量内积的差(小于此值认为相似)
    same_rate = 0.63  # 树的相似度(大于此值认为相似)

    # 加载数据
    dataset, features = load_csv(filename)
    traindata, testdata = split_train_test(dataset, feature_ratio)
    # 森林中不同树个数的对比
    # treenum_list = [20, 30, 40, 50, 60]
    acc_num_list = list()
    acc_list = list()
    for trees_num in treenum_list:
        # 优化1-获取最优深度
        max_depth = getBestDepth(min_size, sample_ratio, trees_num,
                                 feature_ratio, traindata, testdata)
        print('max_depth is ', max_depth)

        # 初始化随机森林
        myRF = randomForest(trees_num, max_depth, min_size, sample_ratio,
                            feature_ratio)
        # 生成随机森林
        myRF.build_randomforest(traindata)

        print('Tree_number: ', myRF.trees.__len__())
        # 计算森林中每棵树的AUC
        auc_list = caculateAUC_1.caculateRFAUC(testdata, myRF.trees)
        # 选取AUC高的决策数形成新的森林(auc优化)
        newTempForest = auc_optimization(auc_list, trees_num, myRF.trees)
        # 相似度优化
        myRF.trees = similarity_optimization(newTempForest, same_value,
                                             same_rate)
        # 测试评估
        acc = myRF.accuracy_metric(testdata[:-1])
        print('myRF1_模型准确率:', acc, '%')
        acc_num_list.append([myRF.trees.__len__(), acc])
        acc_list.append(acc)
    print('trees_num from 20 to 60: ', acc_num_list)
    return acc_list
Exemplo n.º 2
0
    # print('max_depth is ',max_depth)

    # 初始化随机森林
    myRF = randomForest(trees_num, max_depth, min_size, sample_ratio,
                        feature_ratio)
    # 生成随机森林
    myRF.build_randomforest(traindata)

    print('Tree_number: ', myRF.trees.__len__())
    acc = myRF.accuracy_metric(testdata[:-1])
    print('传统RF模型准确率:', acc, '%')

    # 画出某棵树用以可视化观察(这里是第一棵树)
    # plotTree.creatPlot(myRF.trees[0], features)
    # 计算森林中每棵树的AUC
    auc_list = caculateAUC_1.caculateRFAUC(testdata, myRF.trees)
    # 画出每棵树的auc——柱状图
    # plotTree.plotAUCbar(auc_list.__len__(),auc_list)

    # 选取AUC高的决策数形成新的森林(auc优化)
    newTempForest = auc_optimization(auc_list, trees_num, myRF.trees)
    # 相似度优化
    myRF.trees = similarity_optimization(newTempForest, same_value, same_rate)

    print('优化后Tree_number: ', myRF.trees.__len__())
    # 测试评估
    acc = myRF.accuracy_metric(testdata[:-1])
    # print('优化后模型准确率:', acc, '%')
    print('myRF1_模型准确率:', acc, '%')
    # 画出某棵树用以可视化观察(这里是第一棵树)
    # plotTree.creatPlot(myRF.trees[0], features)