def ada_classify(tran_data, test_data): ''' 使用训练的adaboost决策树对测试数据进行预测 ''' res_cls = [] sub_tree_wh = [] wh_classifier = adaboost(tran_data, 300) final_cls = [] ''' [ [权重,决策树], ... ] ''' for wh_tree in wh_classifier: sub_tree_wh.append(wh_tree[0]) res_cls.append(wh_tree[1].classify(test_data)) clses_T = map(list, zip(*res_cls)) print "样本长度", len(clses_T), "分类器个数", len(res_cls) for c in clses_T: vote_res = {} for i, wh in zip(c, sub_tree_wh): if i in vote_res: vote_res[i] += wh else: vote_res[i] = wh final_cls.append(max(vote_res, key=vote_res.get)) print "分类结果:", final_cls accurcy = check_accurcy(test_data, final_cls) return accurcy
def ada_classify(tran_data, test_data): ''' 使用训练的adaboost决策树对测试数据进行预测 ''' res_cls = [] sub_tree_wh = [] wh_classifier = adaboost(tran_data,300) final_cls = [] ''' [ [权重,决策树], ... ] ''' for wh_tree in wh_classifier: sub_tree_wh.append(wh_tree[0]) res_cls.append(wh_tree[1].classify(test_data)) clses_T = map(list, zip(*res_cls)) print "样本长度", len(clses_T), "分类器个数", len(res_cls) for c in clses_T: vote_res = {} for i, wh in zip(c, sub_tree_wh): if i in vote_res: vote_res[i] += wh else: vote_res[i] = wh final_cls.append(max(vote_res, key=vote_res.get)) print "分类结果:", final_cls accurcy = check_accurcy(test_data, final_cls) return accurcy
def rd_fr_classify(tran_data, test_data): forests = random_fr(tran_data) res_clses = [] cls = [] for tree in forests: res_clses.append(tree.classify(test_data)) clses_T = map(list, zip(*res_clses)) for c in clses_T: vote_cls = collections.Counter(c).most_common(1)[0][0] cls.append(vote_cls) accurcy = check_accurcy(test_data, cls) return accurcy
self.leaf_err_sum(cur_node, leaf_err_set) leaf_e_sum = sum(leaf_err_set) + 0.5 * len(leaf_err_set) leaf_err_ratio = leaf_e_sum / len(cur_node.dataset) std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio)) if leaf_e_sum + std_dev > cur_err_sum: print leaf_e_sum + std_dev, cur_err_sum, " prun!!!!" cur_node.childNode = {} cur_node.cls = get_cls_from_data(cur_node.dataset) else: for _, c in cur_node.childNode.items(): self.__prun_tree(c) def prun_tree(self): self.__prun_tree(self.root) if __name__ == '__main__': #dataset = read_data("test.txt") #dataset = read_data("breast-cancer-assignment5.txt") dataset = read_data("german-assignment5.txt") attr_set = range(len(dataset[0])) DiscType = get_disc_val(dataset) decisin_tree = DecisionTree(dataset[1:],attr_set, DiscType) #decisin_tree.prun_tree() res_cls = decisin_tree.classify(dataset[1:]) #res_cls = decisin_tree.classify(dataset[1:]) #print res_cls acc = check_accurcy(dataset[1:], res_cls) print acc
self.leaf_err_sum(cur_node, leaf_err_set) leaf_e_sum = sum(leaf_err_set) + 0.5 * len(leaf_err_set) leaf_err_ratio = leaf_e_sum / len(cur_node.dataset) std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio)) if leaf_e_sum + std_dev > cur_err_sum: print leaf_e_sum + std_dev, cur_err_sum, " prun!!!!" cur_node.childNode = {} cur_node.cls = get_cls_from_data(cur_node.dataset) else: for _, c in cur_node.childNode.items(): self.__prun_tree(c) def prun_tree(self): self.__prun_tree(self.root) if __name__ == '__main__': #dataset = read_data("test.txt") #dataset = read_data("breast-cancer-assignment5.txt") dataset = read_data("german-assignment5.txt") attr_set = range(len(dataset[0])) DiscType = get_disc_val(dataset) decisin_tree = DecisionTree(dataset[1:], attr_set, DiscType) #decisin_tree.prun_tree() res_cls = decisin_tree.classify(dataset[1:]) #res_cls = decisin_tree.classify(dataset[1:]) #print res_cls acc = check_accurcy(dataset[1:], res_cls) print acc