def _expand_rule(self, rule):
        """
        If the rule has enough statistics, possible expanding candidates are checked. If the best
        candidate verifies the Hoeffding bound, a new predicate is add to the  rule.
        The rule statistics are update to fit the new description.

        """

        if len(rule.observed_class_distribution) >= 2:
            class_idx = None
            if self.expand_criterion == _HELLINGER:
                split_criterion = HellingerDistanceCriterion()
            elif self.expand_criterion == _INFOGAIN:
                split_criterion = InfoGainExpandCriterion()
            else:
                split_criterion = FoilGainExpandCriterion()
                class_idx = rule.class_idx
            should_expand = False
            best_expand_suggestions = rule.get_best_expand_suggestion(split_criterion, class_idx)
            best_expand_suggestions.sort(key=attrgetter('merit'))

            if len(best_expand_suggestions) < 2:
                should_expand = len(best_expand_suggestions) > 0
            else:
                hoeffding_bound = self.compute_hoeffding_bound(split_criterion.get_range_of_merit(
                    rule.observed_class_distribution), self.expand_confidence, rule.get_weight_seen())
                best_suggestion = best_expand_suggestions[-1]
                second_best_suggestion = best_expand_suggestions[-2]
                if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or \
                        (hoeffding_bound < self.tie_threshold):
                    should_expand = True

                if self.remove_poor_atts is not None and self.remove_poor_atts:
                    poor_atts = set()
                    # Scan 1 - add any poor attribute to set
                    for i in range(len(best_expand_suggestions)):
                        if best_expand_suggestions[i] is not None:
                            split_atts = [best_expand_suggestions[i].att_idx]
                            if len(split_atts) == 1:
                                if best_suggestion.merit - best_expand_suggestions[i].merit > hoeffding_bound:
                                    poor_atts.add(int(split_atts[0]))
                    # Scan 2 - remove good attributes from set
                    for i in range(len(best_expand_suggestions)):
                        if best_expand_suggestions[i] is not None:
                            split_atts = [best_expand_suggestions[i].att_idx]
                            if len(split_atts) == 1:
                                if best_suggestion.merit - best_expand_suggestions[i].merit < hoeffding_bound:
                                    try:
                                        poor_atts.remove(int(split_atts[0]))
                                    except KeyError:
                                        pass
                    for poor_att in poor_atts:
                        rule.disable_attribute(poor_att)

            if should_expand:
                best_suggestion = best_expand_suggestions[-1]
                new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val)
                add_pred = True
                for pred in rule.predicate_set:
                    if (pred.operator == new_pred.operator) and (pred.att_idx == new_pred.att_idx):
                        if pred.operator == "<=":
                            pred.value = min(pred.value, new_pred.value)
                            rule.observed_class_distribution = best_suggestion. \
                                resulting_stats_from_split(0).copy()
                        elif pred.operator == ">":
                            pred.value = max(pred.value, new_pred.value)
                            rule.observed_class_distribution = best_suggestion. \
                                resulting_stats_from_split(1).copy()
                        rule._attribute_observers = {}
                        add_pred = False
                        break

                if add_pred:
                    rule.predicate_set.append(new_pred)
                    rule._attribute_observers = {}
                    rule.observed_class_distribution = {}
                    if new_pred.operator in ["=", "<="]:
                        rule.observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(0).copy()
                    else:
                        rule.observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(1).copy()

                    if self.expand_criterion == _FOILGAIN:
                        if not self.ordered_rules:
                            for c in rule.observed_class_distribution.keys():
                                if c != rule.class_idx:
                                    new_rule = copy.deepcopy(rule)
                                    new_rule.class_idx = c
                                    split_criterion = FoilGainExpandCriterion()
                                    should_expand = False
                                    best_expand_suggestions = new_rule.get_best_expand_suggestion(split_criterion, c)
                                    best_expand_suggestions.sort(key=attrgetter('merit'))
                                    if len(best_expand_suggestions) < 2:
                                        should_expand = len(best_expand_suggestions) > 0
                                    else:
                                        hoeffding_bound = self.compute_hoeffding_bound(
                                            split_criterion.get_range_of_merit(
                                                new_rule.observed_class_distribution), self.expand_confidence,
                                            new_rule.get_weight_seen())
                                        best_suggestion = best_expand_suggestions[-1]
                                        second_best_suggestion = best_expand_suggestions[-2]
                                        if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) \
                                                or (hoeffding_bound < self.tie_threshold):
                                            should_expand = True

                                        if self.remove_poor_atts is not None and self.remove_poor_atts:
                                            poor_atts = set()
                                            # Scan 1 - add any poor attribute to set
                                            for i in range(len(best_expand_suggestions)):
                                                if best_expand_suggestions[i] is not None:
                                                    split_atts = [best_expand_suggestions[i].att_idx]
                                                    if len(split_atts) == 1:
                                                        if best_suggestion.merit - best_expand_suggestions[i].merit > \
                                                                hoeffding_bound:
                                                            poor_atts.add(int(split_atts[0]))
                                            # Scan 2 - remove good attributes from set
                                            for i in range(len(best_expand_suggestions)):
                                                if best_expand_suggestions[i] is not None:
                                                    split_atts = [best_expand_suggestions[i].att_idx]
                                                    if len(split_atts) == 1:
                                                        if best_suggestion.merit - best_expand_suggestions[i].merit < \
                                                                hoeffding_bound:
                                                            try:
                                                                poor_atts.remove(int(split_atts[0]))
                                                            except KeyError:
                                                                pass
                                            for poor_att in poor_atts:
                                                new_rule.disable_attribute(poor_att)

                                    if should_expand:
                                        best_suggestion = best_expand_suggestions[-1]
                                        new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator,
                                                             best_suggestion.att_val)
                                        add_pred = True
                                        for pred in new_rule.predicate_set:
                                            if (pred.operator == new_pred.operator) and (
                                                    pred.att_idx == new_pred.att_idx):
                                                if pred.operator == "<=":
                                                    pred.value = min(pred.value, new_pred.value)
                                                    new_rule.observed_class_distribution = best_suggestion. \
                                                        resulting_stats_from_split(0).copy()
                                                elif pred.operator == ">":
                                                    pred.value = max(pred.value, new_pred.value)
                                                    new_rule.observed_class_distribution = best_suggestion. \
                                                        resulting_stats_from_split(1).copy()
                                                new_rule._attribute_observers = {}
                                                add_pred = False
                                                break
                                        if add_pred:
                                            new_rule.predicate_set.append(new_pred)
                                            new_rule._attribute_observers = {}
                                            new_rule.observed_class_distribution = {}
                                            if new_pred.operator in ["=", "<="]:
                                                new_rule.observed_class_distribution = best_suggestion. \
                                                    resulting_stats_from_split(0).copy()
                                            else:
                                                new_rule.observed_class_distribution = best_suggestion. \
                                                    resulting_stats_from_split(1).copy()
                                        self.rule_set.append(copy.deepcopy(new_rule))
    def _create_rule(self):
        """ Create a new rule from the default rule.

        If the default rule has enough statistics, possible expanding candidates are checked.
        If the best candidate verifies the Hoeffding bound, a new rule is created if a one predicate.
        The rule statistics are passed down to the new rule and the default rule is reset.

        """
        if len(self.default_rule.observed_class_distribution) >= 2:
            if self.expand_criterion in [_INFOGAIN, _HELLINGER]:
                if self.expand_criterion == _HELLINGER:
                    expand_criterion = HellingerDistanceCriterion()
                else:
                    expand_criterion = InfoGainExpandCriterion()
                should_expand = False
                best_expand_suggestions = self.default_rule.get_best_expand_suggestion(expand_criterion, None)
                best_expand_suggestions.sort(key=attrgetter('merit'))

                if len(best_expand_suggestions) < 2:
                    should_expand = len(best_expand_suggestions) > 0
                else:
                    hoeffding_bound = self.compute_hoeffding_bound(expand_criterion.get_range_of_merit(
                        self.default_rule.observed_class_distribution), self.expand_confidence,
                        self.default_rule.get_weight_seen())
                    best_suggestion = best_expand_suggestions[-1]
                    second_best_suggestion = best_expand_suggestions[-2]

                    if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or \
                            (hoeffding_bound < self.tie_threshold):
                        should_expand = True

                if should_expand:
                    best_suggestion = best_expand_suggestions[-1]
                    new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val)
                    self.rule_set.append(self.new_rule(None, copy.deepcopy(self.drift_detector), None))
                    self.rule_set[-1].predicate_set.append(new_pred)
                    self.default_rule.restart()
                    if new_pred.operator in ["=", "<="]:
                        self.rule_set[-1].observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(0).copy()
                        self.default_rule.observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(1).copy()
                    else:
                        self.rule_set[-1].observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(1).copy()
                        self.default_rule.observed_class_distribution = best_suggestion. \
                            resulting_stats_from_split(0).copy()
                else:
                    self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen()
            elif self.expand_criterion == _FOILGAIN:
                expand_criterion = FoilGainExpandCriterion()
                should_expand = False
                for class_idx in self.default_rule.observed_class_distribution.keys():
                    best_expand_suggestions = self.default_rule.get_best_expand_suggestion(expand_criterion, class_idx)
                    best_expand_suggestions.sort(key=attrgetter('merit'))
                    if len(best_expand_suggestions) < 2:
                        should_expand = len(best_expand_suggestions) > 0
                    else:
                        hoeffding_bound = self.compute_hoeffding_bound(expand_criterion.get_range_of_merit(
                            self.default_rule.observed_class_distribution), self.expand_confidence,
                            self.default_rule.get_weight_seen())
                        best_suggestion = best_expand_suggestions[-1]
                        second_best_suggestion = best_expand_suggestions[-2]
                        if ((best_suggestion.merit - second_best_suggestion.merit) > hoeffding_bound) or (
                                hoeffding_bound < self.tie_threshold):
                            should_expand = True
                    if should_expand:
                        best_suggestion = best_expand_suggestions[-1]
                        new_pred = Predicate(best_suggestion.att_idx, best_suggestion.operator, best_suggestion.att_val)
                        self.rule_set.append(self.new_rule(None, copy.deepcopy(self.drift_detector), class_idx))
                        self.rule_set[-1].predicate_set.append(new_pred)
                        if new_pred.operator in ["=", "<="]:
                            self.rule_set[-1].observed_class_distribution = best_suggestion. \
                                resulting_stats_from_split(0).copy()
                        else:
                            self.rule_set[-1].observed_class_distribution = best_suggestion. \
                                resulting_stats_from_split(1).copy()
                if should_expand:
                    self.default_rule.restart()
                else:
                    self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen()
        else:
            self.default_rule.weight_seen_at_last_expand = self.default_rule.get_weight_seen()
示例#3
0
 def branch_rule(self, branch):
     condition = '==' if branch == 0 else '!='
     return Predicate(self._att_idx, condition, self._att_value)
示例#4
0
 def branch_rule(self, branch):
     return Predicate(self._att_idx, '==', branch)
示例#5
0
 def branch_rule(self, branch):
     condition = '<' if branch == 0 else '>'
     equals_branch = 0 if self._equals_passes_test else 1
     condition += '=' if branch == equals_branch else ''
     return Predicate(self._att_idx, condition, self._att_value)
示例#6
0
 def branch_rule(self, branch):
     return Predicate(
         self._att_idx, '==', self._reverse_branch_mapping[branch]
     )