예제 #1
0
 def decision_tree_learning(examples, attrs, parent_examples=()):
     if len(examples) == 0:
         return plurality_value(parent_examples)
     if all_same_class(examples):
         return DecisionLeaf(examples[0][target])
     if len(attrs) == 0:
         return plurality_value(examples)
     A = choose_attribute(attrs, examples)
     tree = DecisionFork(A, dataset.attr_names[A], plurality_value(examples))
     for (v_k, exs) in split_by(A, examples):
         subtree = decision_tree_learning(exs, remove_all(A, attrs), examples)
         tree.add(v_k, subtree)
     return tree
예제 #2
0
 def set_problem(self, target, inputs=None, exclude=()):
     """
     Set (or change) the target and/or inputs.
     This way, one DataSet can be used multiple ways. inputs, if specified,
     is a list of attributes, or specify exclude as a list of attributes
     to not use in inputs. Attributes can be -n .. n, or an attr_name.
     Also computes the list of possible values, if that wasn't done yet.
     """
     self.target = self.attr_num(target)
     exclude = list(map(self.attr_num, exclude))
     if inputs:
         self.inputs = remove_all(self.target, inputs)
     else:
         self.inputs = [a for a in self.attrs if a != self.target and a not in exclude]
     if not self.values:
         self.update_values()
     self.check_me()
예제 #3
0
def information_content(values):
    """Number of bits to represent the probability distribution in values."""
    probabilities = normalize(remove_all(0, values))
    return sum(-p * math.log2(p) for p in probabilities)