def chuli_file(file_name, M): file_list, lable_list, feature_is_discrete = read_file(file_name) lable_list = [1 if lable > 0 else -1 for lable in lable_list] # 对二类分类的 lable 做优化 fenge = fenge_file_for_validation(file_list, lable_list, 10) pre_all = [] for one_fenge in fenge: decision_tree_obj = AdaBoost(one_fenge[2], one_fenge[3], feature_is_discrete, M) count = 0.0 for i in range(len(one_fenge[0])): if decision_tree_obj.predict(one_fenge[0][i]) == one_fenge[1][i]: count += 1.0 pre = count / len(one_fenge[0]) pre_all.append(pre) print 'precision :', pre mean_pre = sum(pre_all) / 10.0 print 'mean precision :', mean_pre print 'standard deviation :',math.sqrt(sum([(w-mean_pre)**2 for w in pre_all])) pass
def chuli_file(file_name , t, m): file_list, lable_list, feature_is_discrete = read_file(file_name) fenge = fenge_file_for_validation(file_list, lable_list, 10) pre_all = [] for one_fenge in fenge: data_list = one_fenge[2] lable_list = one_fenge[3] t_ge_random_forests = [] # t 个 random forest for i_t in range(t): # 生成 t 个树 sample_data_list = [] sample_lable_list = [] for num in range(len(data_list)): # 进行抽样 random_i = int(random.uniform(0, len(data_list))) sample_data_list.append(data_list[random_i]) sample_lable_list.append(lable_list[random_i]) t_ge_random_forests.append(RandomForest(sample_data_list, sample_lable_list, feature_is_discrete, [], m)) # 进行测试 count = 0.0 for i in range(len(one_fenge[0])): t_predict = [] for i_p in range(t): t_predict.append(t_ge_random_forests[i_p].predict(one_fenge[0][i])) predict_re = -99 # 预测结果 for i_p in range(t): if predict_re == -99 or(t_predict[i_p] != predict_re and t_predict.count(t_predict[i_p]) > t_predict.count(predict_re)): predict_re = t_predict[i_p] if predict_re == one_fenge[1][i]: count += 1.0 pre = count / len(one_fenge[0]) pre_all.append(pre) print 'precision :', pre mean_pre = sum(pre_all) / 10.0 print 'mean precision :', mean_pre print 'standard deviation :',math.sqrt(sum([(w-mean_pre)**2 for w in pre_all])) pass