def rd_fr_classify(tran_data, test_data): forests = random_fr(tran_data) res_clses = [] cls = [] for tree in forests: res_clses.append(tree.classify(test_data)) clses_T = map(list, zip(*res_clses)) for c in clses_T: vote_cls = collections.Counter(c).most_common(1)[0][0] cls.append(vote_cls) accurcy = check_accurcy(test_data, cls) return accurcy if __name__ == '__main__': #dataset = read_data("breast-cancer-assignment5.txt") dataset = read_data("german-assignment5.txt") DiscType = get_disc_val(dataset) attrset = range(len(dataset[0])) #forests = random_fr(dataset) #accurcy = rd_fr_classify(dataset, dataset[1:]) #print accurcy print fcv(dataset, rd_fr_classify)
if 1.0 - sum(data_wh[:-1]) < 0: print "less than 0-------------------------", 1-sum(data_wh[:-1]) data_wh[-1] = 1.0 - sum(data_wh[:-1]) #确保权重之和为1 ''' def get_pre_res(dataset, res_cls): ''' 根据分类结果以及训练集本身的标签,对正确以及错误分类进行统计 ''' pre_statis = [] for d, cls in zip(dataset, res_cls): if d[-1] == cls: pre_statis.append(1) else: pre_statis.append(0) return pre_statis if __name__ == '__main__': #datasets = read_data("german-assignment5.txt") datasets = read_data("breast-cancer-assignment5.txt") #datasets = read_data("test.txt") DiscType = get_disc_val(datasets) AttrSet = range(len(datasets[0])) #print ada_classify(datasets[1:255], datasets[255:]) #print ada_classify(datasets[1:10], datasets[10:]) print fcv(datasets, ada_classify)
''' if 1.0 - sum(data_wh[:-1]) < 0: print "less than 0-------------------------", 1-sum(data_wh[:-1]) data_wh[-1] = 1.0 - sum(data_wh[:-1]) #确保权重之和为1 ''' def get_pre_res(dataset, res_cls): ''' 根据分类结果以及训练集本身的标签,对正确以及错误分类进行统计 ''' pre_statis = [] for d, cls in zip(dataset, res_cls): if d[-1] == cls: pre_statis.append(1) else: pre_statis.append(0) return pre_statis if __name__ == '__main__': #datasets = read_data("german-assignment5.txt") datasets = read_data("breast-cancer-assignment5.txt") #datasets = read_data("test.txt") DiscType = get_disc_val(datasets) AttrSet = range(len(datasets[0])) #print ada_classify(datasets[1:255], datasets[255:]) #print ada_classify(datasets[1:10], datasets[10:]) print fcv(datasets, ada_classify)
return forests def rd_fr_classify(tran_data, test_data): forests = random_fr(tran_data) res_clses = [] cls = [] for tree in forests: res_clses.append(tree.classify(test_data)) clses_T = map(list, zip(*res_clses)) for c in clses_T: vote_cls = collections.Counter(c).most_common(1)[0][0] cls.append(vote_cls) accurcy = check_accurcy(test_data, cls) return accurcy if __name__ == '__main__': #dataset = read_data("breast-cancer-assignment5.txt") dataset = read_data("german-assignment5.txt") DiscType = get_disc_val(dataset) attrset = range(len(dataset[0])) #forests = random_fr(dataset) #accurcy = rd_fr_classify(dataset, dataset[1:]) #print accurcy print fcv(dataset, rd_fr_classify)