def choose_root(): global current_labels global current_instances global tree_nodes global visual_nodes global tree_root global blocked_features tree_nodes = [] feature_dict = {} for feature in list(current_instances[0].feature_dict.keys()): if (feature not in blocked_features): feature_dict[feature] = calculate_global_entropy( current_instances) - get_feature_entropy(feature) #print(feature_dict) feature_name = max(feature_dict, key=feature_dict.get) node = TreeNode(parent=None, feature_name=str(feature_name), children_labels=None, children_nodes=None, is_leaf=True, is_feature=True, labels=get_global_labels()) root = Node(node.feature_name) blocked_features.append(node.feature_name) visual_nodes.append(root) tree_nodes.append(node) tree_root = node node_children_labels = {} node_children_nodes = {} for attr in get_feature_attrs(node.feature_name): temp_node = TreeNode(parent=node, feature_name=None, children_labels=None, children_nodes=None, is_leaf=True, is_feature=False, labels=get_feature_attrs(node.feature_name)[attr]) temp_visual_node = Node(str(attr) + "[" + str(temp_node.labels[0]) + "," + str(temp_node.labels[1]) + "]", parent=root) node_children_labels[str(attr)] = temp_node.labels node_children_nodes[str(attr)] = temp_node tree_nodes.append(temp_node) node.children_labels = node_children_labels node.children_nodes = node_children_nodes current_instances = [] #print(node.feature_name) return
def check_for_pullup(): global tree_nodes global tree_root for key in tree_root.children_nodes.keys(): n = tree_root.children_nodes[key] now_instances = tree_root.seen_instances if ((n.is_feature)): print(n.feature_name) if (n.entropy() < n.parent.entropy()): node = TreeNode(parent=None, feature_name=str(n.feature_name), children_labels=None, children_nodes=None, is_leaf=True, is_feature=True, labels=n.labels) tree_root = node node_children_labels = {} node_children_nodes = {} for attr in n.children_nodes.keys(): print(attr) temp_node = TreeNode(parent=node, feature_name=n.parent.feature_name, children_labels=None, children_nodes=None, is_leaf=True, is_feature=False, labels=get_feature_attrs( node.feature_name)[attr]) node_children_labels[str(attr)] = temp_node.labels node_children_nodes[str(attr)] = temp_node tree_nodes.append(temp_node) node.children_labels = node_children_labels node.children_nodes = node_children_nodes create_tree(now_instances) print("------------") return