Пример #1
0
def tree_generate(data, tmp_attributes, attributes, optimizer):
    c, same = same_class(data)
    node = TreeNode()
    # situation 1
    if same is True:
        node.data = data
        node.category = c
        return node
    if Attribute.empty(tmp_attributes) is True or have_same_attributes(
            data, tmp_attributes, attributes):
        c = most_class(data)
        node.data = data
        node.category = c
        return node
    # attr 可能是continuous的, 此时attr.value有值
    attr, weight, data_set = optimizer(data, tmp_attributes, attributes)
    # print(attr.name)
    node.attribute = attr
    node.weight = weight
    node.next_node = []
    i = 0
    if attr.discrete is True:
        index = find_attr(attr, attributes)
        # print(index)
        for iset in data_set:
            for d in iset:
                if d.value[index] == "?":
                    d.w *= node.weight[i]
            if len(iset) == 0:
                new_node = TreeNode(data=iset)
                new_node.category = most_class(data)
                node.next_node.append(new_node)
            else:
                Attribute.remove(tmp_attributes, attr)
                new_node = tree_generate(iset, tmp_attributes, attributes,
                                         optimizer)
                node.next_node.append(new_node)
            i += 1
    else:
        vindex = find_index(attr.range, attr.value)
        aindex = find_attr(attr, tmp_attributes)
        tmp_attrs1 = copy.deepcopy(tmp_attributes)
        tmp_attrs2 = copy.deepcopy(tmp_attributes)
        tmp_attrs = [tmp_attrs1, tmp_attrs2]
        tmp_attrs1[aindex].range = tmp_attrs1[aindex].range[:vindex]
        tmp_attrs2[aindex].range = tmp_attrs2[aindex].range[vindex + 1:]
        if len(tmp_attrs1[aindex].range) == 0:
            Attribute.remove(tmp_attrs1, attr)
        if len(tmp_attrs2[aindex].range) == 0:
            Attribute.remove(tmp_attrs2, attr)
        for i in range(2):
            for d in data_set[i]:
                if d.value[aindex] == "?":
                    d.w *= node.weight[i]
            if len(data_set[i]) == 0:
                new_node = TreeNode(data=data_set[i])
                new_node.category = most_class(data)
                node.next_node.append(new_node)
            else:
                new_node = tree_generate(data_set[i], tmp_attrs[i], attributes,
                                         optimizer)
                node.next_node.append(new_node)
    return node