Пример #1
0
def chuli_file(file_name, M):    
    file_list, lable_list, feature_is_discrete = read_file(file_name)
    lable_list = [1 if lable > 0 else -1 for lable in lable_list]  # 对二类分类的 lable 做优化
    fenge = fenge_file_for_validation(file_list, lable_list, 10)
    pre_all = []
    for one_fenge in fenge:
        decision_tree_obj = AdaBoost(one_fenge[2], one_fenge[3], feature_is_discrete, M)
        count = 0.0
        for i in range(len(one_fenge[0])):
            if decision_tree_obj.predict(one_fenge[0][i]) == one_fenge[1][i]:
                count += 1.0
        pre = count / len(one_fenge[0])
        pre_all.append(pre)
        print 'precision :', pre
    mean_pre = sum(pre_all) / 10.0
    print 'mean precision      :', mean_pre
    print 'standard deviation  :',math.sqrt(sum([(w-mean_pre)**2 for w in pre_all]))
    pass
Пример #2
0
def chuli_file(file_name , t, m):    
    file_list, lable_list, feature_is_discrete = read_file(file_name)
    fenge = fenge_file_for_validation(file_list, lable_list, 10)
    pre_all = []
    for one_fenge in fenge:
        
        data_list = one_fenge[2]
        lable_list = one_fenge[3]
        
        t_ge_random_forests = []  # t 个 random forest
        for i_t in range(t):  # 生成 t 个树
            sample_data_list = []
            sample_lable_list = []
            for num in range(len(data_list)):  # 进行抽样
                random_i = int(random.uniform(0, len(data_list)))
                sample_data_list.append(data_list[random_i])
                sample_lable_list.append(lable_list[random_i])
            t_ge_random_forests.append(RandomForest(sample_data_list, sample_lable_list, feature_is_discrete, [], m))

        # 进行测试            
        count = 0.0
        for i in range(len(one_fenge[0])):
            t_predict = []
            for i_p in range(t):
                t_predict.append(t_ge_random_forests[i_p].predict(one_fenge[0][i]))
            predict_re = -99  # 预测结果
            for i_p in range(t):
                if predict_re == -99 or(t_predict[i_p] != predict_re and t_predict.count(t_predict[i_p]) > t_predict.count(predict_re)):
                    predict_re = t_predict[i_p]
            if predict_re == one_fenge[1][i]:
                count += 1.0
        pre = count / len(one_fenge[0])
        pre_all.append(pre)
        print 'precision :', pre
    mean_pre = sum(pre_all) / 10.0
    print 'mean precision      :', mean_pre
    print 'standard deviation  :',math.sqrt(sum([(w-mean_pre)**2 for w in pre_all]))
    pass