def _deactivate_learning_node(self, to_deactivate: ActiveLearningNode, parent: SplitNode, parent_branch: int): """Deactivate a learning node. Parameters ---------- to_deactivate: ActiveLearningNode The node to deactivate. parent: SplitNode The node's parent. parent_branch: int Parent node's branch index. """ if self.leaf_prediction == _TARGET_MEAN: new_leaf = InactiveLearningNodeForRegression( to_deactivate.get_observed_class_distribution()) else: new_leaf = InactiveLearningNodePerceptron( to_deactivate.get_observed_class_distribution(), to_deactivate.perceptron_weight) if parent is None: self._tree_root = new_leaf else: parent.set_child(parent_branch, new_leaf) self._active_leaf_node_cnt -= 1 self._inactive_leaf_node_cnt += 1
def _deactivate_learning_node(self, to_deactivate: ActiveLearningNode, parent: SplitNode, parent_branch: int): """Deactivate a learning node. Parameters ---------- to_deactivate: ActiveLearningNode The node to deactivate. parent: SplitNode The node's parent. parent_branch: int Parent node's branch index. """ if self.leaf_prediction == _TARGET_MEAN: new_leaf = InactiveLearningNodeForRegression( to_deactivate.get_observed_class_distribution()) elif self.leaf_prediction == _PERCEPTRON: new_leaf = InactiveLearningNodePerceptronMultiTarget( to_deactivate.get_observed_class_distribution(), to_deactivate.perceptron_weight, to_deactivate.random_state) elif self.leaf_prediction == _ADAPTIVE: new_leaf = InactiveLearningNodeAdaptiveMultiTarget( to_deactivate.get_observed_class_distribution(), to_deactivate.perceptron_weight, to_deactivate.random_state) new_leaf.fMAE_M = to_deactivate.fMAE_M new_leaf.fMAE_P = to_deactivate.fMAE_P if parent is None: self._tree_root = new_leaf else: parent.set_child(parent_branch, new_leaf) self._active_leaf_node_cnt -= 1 self._inactive_leaf_node_cnt += 1
def _deactivate_learning_node(self, to_deactivate: ActiveLearningNode, parent: SplitNode, parent_branch: int): """Deactivate a learning node. Parameters ---------- to_deactivate: ActiveLearningNode The node to deactivate. parent: SplitNode The node's parent. parent_branch: int Parent node's branch index. """ new_leaf = self._new_learning_node( to_deactivate.get_observed_class_distribution(), to_deactivate, is_active_node=False) if parent is None: self._tree_root = new_leaf else: parent.set_child(parent_branch, new_leaf) self._active_leaf_node_cnt -= 1 self._inactive_leaf_node_cnt += 1
def _attempt_to_split(self, node: ActiveLearningNode, parent: SplitNode, parent_idx: int): """ Attempt to split a node. If the samples seen so far are not from the same class then: 1. Find split candidates and select the top 2. 2. Compute the Hoeffding bound. 3. If the difference between the top 2 split candidates is larger than the Hoeffding bound: 3.1 Replace the leaf node by a split node. 3.2 Add a new leaf node on each branch of the new split node. 3.3 Update tree's metrics Optional: Disable poor attribute. Depends on the tree's configuration. Parameters ---------- node: ActiveLearningNode The node to evaluate. parent: SplitNode The node's parent. parent_idx: int Parent node's branch index. """ if not node.observed_class_distribution_is_pure(): if self._split_criterion == GINI_SPLIT: split_criterion = GiniSplitCriterion() elif self._split_criterion == INFO_GAIN_SPLIT: split_criterion = InfoGainSplitCriterion() elif self._split_criterion == HELLINGER: split_criterion = HellingerDistanceCriterion() else: split_criterion = InfoGainSplitCriterion() best_split_suggestions = node.get_best_split_suggestions( split_criterion, self) best_split_suggestions.sort(key=attrgetter('merit')) should_split = False if len(best_split_suggestions) < 2: should_split = len(best_split_suggestions) > 0 else: hoeffding_bound = self.compute_hoeffding_bound( split_criterion.get_range_of_merit( node.get_observed_class_distribution()), self.split_confidence, node.get_weight_seen()) best_suggestion = best_split_suggestions[-1] second_best_suggestion = best_split_suggestions[-2] if (best_suggestion.merit - second_best_suggestion.merit > hoeffding_bound or hoeffding_bound < self.tie_threshold ): # best_suggestion.merit > 1e-10 and \ should_split = True if self.remove_poor_atts is not None and self.remove_poor_atts: poor_atts = set() # Scan 1 - add any poor attribute to set for i in range(len(best_split_suggestions)): if best_split_suggestions[i] is not None: split_atts = best_split_suggestions[ i].split_test.get_atts_test_depends_on() if len(split_atts) == 1: if best_suggestion.merit - best_split_suggestions[ i].merit > hoeffding_bound: poor_atts.add(int(split_atts[0])) # Scan 2 - remove good attributes from set for i in range(len(best_split_suggestions)): if best_split_suggestions[i] is not None: split_atts = best_split_suggestions[ i].split_test.get_atts_test_depends_on() if len(split_atts) == 1: if best_suggestion.merit - best_split_suggestions[ i].merit < hoeffding_bound: poor_atts.remove(int(split_atts[0])) for poor_att in poor_atts: node.disable_attribute(poor_att) if should_split: split_decision = best_split_suggestions[-1] if split_decision.split_test is None: # Preprune - null wins self._deactivate_learning_node(node, parent, parent_idx) else: new_split = self.new_split_node( split_decision.split_test, node.get_observed_class_distribution()) for i in range(split_decision.num_splits()): new_child = self._new_learning_node( split_decision. resulting_class_distribution_from_split(i)) new_split.set_child(i, new_child) self._active_leaf_node_cnt -= 1 self._decision_node_cnt += 1 self._active_leaf_node_cnt += split_decision.num_splits() if parent is None: self._tree_root = new_split else: parent.set_child(parent_idx, new_split) # Manage memory self.enforce_tracker_limit()
def new_split_node(self, split_test, class_observations): """ Create a new split node.""" return SplitNode(split_test, class_observations)