def tree_generate(data, tmp_attributes, attributes, optimizer): c, same = same_class(data) node = TreeNode() # situation 1 if same is True: node.data = data node.category = c return node if Attribute.empty(tmp_attributes) is True or have_same_attributes( data, tmp_attributes, attributes): c = most_class(data) node.data = data node.category = c return node # attr 可能是continuous的, 此时attr.value有值 attr, weight, data_set = optimizer(data, tmp_attributes, attributes) # print(attr.name) node.attribute = attr node.weight = weight node.next_node = [] i = 0 if attr.discrete is True: index = find_attr(attr, attributes) # print(index) for iset in data_set: for d in iset: if d.value[index] == "?": d.w *= node.weight[i] if len(iset) == 0: new_node = TreeNode(data=iset) new_node.category = most_class(data) node.next_node.append(new_node) else: Attribute.remove(tmp_attributes, attr) new_node = tree_generate(iset, tmp_attributes, attributes, optimizer) node.next_node.append(new_node) i += 1 else: vindex = find_index(attr.range, attr.value) aindex = find_attr(attr, tmp_attributes) tmp_attrs1 = copy.deepcopy(tmp_attributes) tmp_attrs2 = copy.deepcopy(tmp_attributes) tmp_attrs = [tmp_attrs1, tmp_attrs2] tmp_attrs1[aindex].range = tmp_attrs1[aindex].range[:vindex] tmp_attrs2[aindex].range = tmp_attrs2[aindex].range[vindex + 1:] if len(tmp_attrs1[aindex].range) == 0: Attribute.remove(tmp_attrs1, attr) if len(tmp_attrs2[aindex].range) == 0: Attribute.remove(tmp_attrs2, attr) for i in range(2): for d in data_set[i]: if d.value[aindex] == "?": d.w *= node.weight[i] if len(data_set[i]) == 0: new_node = TreeNode(data=data_set[i]) new_node.category = most_class(data) node.next_node.append(new_node) else: new_node = tree_generate(data_set[i], tmp_attrs[i], attributes, optimizer) node.next_node.append(new_node) return node