示例#1
1
def rd_fr_classify(tran_data, test_data):
    forests = random_fr(tran_data)
    res_clses = []
    cls = []

    for tree in forests:
        res_clses.append(tree.classify(test_data))

    clses_T = map(list, zip(*res_clses))

    for c in clses_T:
        vote_cls = collections.Counter(c).most_common(1)[0][0]
        cls.append(vote_cls)

    accurcy = check_accurcy(test_data, cls)
    return accurcy




if __name__ == '__main__':
    #dataset =  read_data("breast-cancer-assignment5.txt")
    dataset =  read_data("german-assignment5.txt")
    DiscType = get_disc_val(dataset)
    attrset = range(len(dataset[0]))
    #forests = random_fr(dataset)
    #accurcy = rd_fr_classify(dataset, dataset[1:])
    #print accurcy
    print fcv(dataset, rd_fr_classify)
示例#2
0
    if 1.0 - sum(data_wh[:-1]) < 0:
        print "less than 0-------------------------", 1-sum(data_wh[:-1])
    data_wh[-1] = 1.0 - sum(data_wh[:-1])             #确保权重之和为1
    '''


def get_pre_res(dataset, res_cls):
    '''
    根据分类结果以及训练集本身的标签,对正确以及错误分类进行统计
    '''
    pre_statis = []
    for d, cls in zip(dataset, res_cls):
        if d[-1] == cls:
            pre_statis.append(1)
        else:
            pre_statis.append(0)
    return pre_statis


if __name__ == '__main__':
    #datasets =  read_data("german-assignment5.txt")
    datasets = read_data("breast-cancer-assignment5.txt")
    #datasets =  read_data("test.txt")
    DiscType = get_disc_val(datasets)
    AttrSet = range(len(datasets[0]))

    #print ada_classify(datasets[1:255], datasets[255:])
    #print ada_classify(datasets[1:10], datasets[10:])

    print fcv(datasets, ada_classify)
示例#3
0
    '''
    if 1.0 - sum(data_wh[:-1]) < 0:
        print "less than 0-------------------------", 1-sum(data_wh[:-1])
    data_wh[-1] = 1.0 - sum(data_wh[:-1])             #确保权重之和为1
    '''


def get_pre_res(dataset, res_cls):
    '''
    根据分类结果以及训练集本身的标签,对正确以及错误分类进行统计
    '''
    pre_statis = []
    for d, cls in zip(dataset, res_cls):
        if d[-1] == cls:
            pre_statis.append(1)
        else:
            pre_statis.append(0)
    return pre_statis

if __name__ == '__main__':
    #datasets =  read_data("german-assignment5.txt")
    datasets =  read_data("breast-cancer-assignment5.txt")
    #datasets =  read_data("test.txt")
    DiscType = get_disc_val(datasets)
    AttrSet = range(len(datasets[0]))

    #print ada_classify(datasets[1:255], datasets[255:])
    #print ada_classify(datasets[1:10], datasets[10:])

    print fcv(datasets, ada_classify)
示例#4
0
    return forests


def rd_fr_classify(tran_data, test_data):
    forests = random_fr(tran_data)
    res_clses = []
    cls = []

    for tree in forests:
        res_clses.append(tree.classify(test_data))

    clses_T = map(list, zip(*res_clses))

    for c in clses_T:
        vote_cls = collections.Counter(c).most_common(1)[0][0]
        cls.append(vote_cls)

    accurcy = check_accurcy(test_data, cls)
    return accurcy


if __name__ == '__main__':
    #dataset =  read_data("breast-cancer-assignment5.txt")
    dataset = read_data("german-assignment5.txt")
    DiscType = get_disc_val(dataset)
    attrset = range(len(dataset[0]))
    #forests = random_fr(dataset)
    #accurcy = rd_fr_classify(dataset, dataset[1:])
    #print accurcy
    print fcv(dataset, rd_fr_classify)