def tdidt(self, current_instances, available_attributes): # basic approach (uses recursion!!): # select an attribute to split on split_attribute = myutils.select_attribute(current_instances, available_attributes, self.header) available_attributes.remove(split_attribute) # cannot split on the same attribute twice in a branch # recall: python is pass by object reference!! tree = ["Attribute", split_attribute] prevPartition = current_instances # group data by attribute domains (creates pairwise disjoint partitions) partitions = self.partition_instances(current_instances, split_attribute) # for each partition, repeat unless one of the following occurs (base case) for attribute_value, partition in partitions.items(): value_subtree = ["Value", attribute_value] # TODO: appending leaf nodes and subtrees appropriately to value_subtree # CASE 1: all class labels of the partition are the same => make a leaf node if len(partition) > 0 and myutils.all_same_class(partition): value_subtree.append([ "Leaf", partition[0][-1], len(partition), len(current_instances) ]) # CASE 2: no more attributes to select (clash) => handle clash w/majority vote leaf node elif len(partition) > 0 and len(available_attributes) == 0: [majority, count, total] = myutils.compute_partition_stats(partition) value_subtree.append(["Leaf", majority, count, total]) # CASE 3: no more instances to partition (empty partition) => backtrack and replace attribute node with majority vote leaf node elif len(partition) == 0: [majority, count, total] = myutils.compute_partition_stats(prevPartition) value_subtree.append(["Leaf", majority, count, total]) else: # all base cases are false... recurse!! if len(available_attributes) > self.F: subtree = self.tdidt( partition, random.sample(available_attributes.copy(), self.F)) else: subtree = self.tdidt(partition, available_attributes.copy()) value_subtree.append(subtree) tree.append(value_subtree) # need to append subtree to value_subtree and appropriately append value subtree to tree return tree
def tdidt(self, current_instances, available_attributes): # select an attribute to split on split_attribute = myutils.select_attribute(current_instances, available_attributes, self.attribute_domains, self.header) available_attributes.remove(split_attribute) # cannot split on the same attribute twice in a branch # recall: python is pass by object reference!! tree = ["Attribute", split_attribute] # group data by attribute domains (creates pairwise disjoint partitions) partitions = myutils.partition_instances(current_instances, split_attribute, self.attribute_domains, self.header) # for each partition, repeat unless one of the following occurs (base case) for attribute_value, partition in partitions.items(): value_subtree = ["Value", attribute_value] # CASE 1: all class labels of the partition are the same => make a leaf node if len(partition) > 0 and myutils.all_same_class(partition): leaf_node = myutils.create_leaf_node(partition, partitions, case = 1) value_subtree.append(leaf_node) tree.append(value_subtree) # CASE 2: no more attributes to select (clash) => handle clash w/majority vote leaf node elif len(partition) > 0 and len(available_attributes) == 0: leaf_node = myutils.create_leaf_node(partition, partitions, case = 2) value_subtree.append(leaf_node) tree.append(value_subtree) # CASE 3: no more instances to partition (empty partition) => backtrack and replace attribute node with majority vote leaf node elif len(partition) == 0: # replace attribute node with majority vote leaf node # tree = create_leaf_node(partition, partitions, case = 3) # break # don't look at the other attributes because we are replacing attribute node with leaf node return None else: # all base cases are false... recurse!! subtree = self.tdidt(partition, available_attributes.copy()) # check if previous step was a case 3 # create a majority vote node if so if subtree is None: leaf_node = myutils.create_leaf_node(partition, partitions, case = 2) value_subtree.append(leaf_node) else: # need to append subtree to value_subtree and appropriately append value subtre # to tree # subtree is 3rd value in list value_subtree.append(subtree) tree.append(value_subtree) return tree