def __init__(self, split_test, class_observations):
     SplitNode.__init__(self, split_test, class_observations)
     self._estimation_error_weight = ADWIN()
     self._alternate_tree = None
     self.error_change = False
     self._random_seed = 1
     self._classifier_random = random.seed(self._random_seed)
 def __init__(self, split_test, class_observations, size):
     SplitNode.__init__(self, split_test, class_observations, size)
     self._estimation_error_weight = ADWIN()
     self._alternate_tree = None  # CHECK not HoeffdingTree.Node(), I force alternatetree to be None so that will be that initialized as _new_learning_node (line 154)
     self.error_change = False
     self._random_seed = 1
     self._classifier_random = random.seed(self._random_seed)
Пример #3
0
    def __init__(self,
                 nb_ensemble=10,
                 max_features='auto',
                 disable_weighted_vote=False,
                 lambda_value=6,
                 performance_metric='acc',
                 drift_detection_method: BaseDriftDetector = ADWIN(0.001),
                 warning_detection_method: BaseDriftDetector = ADWIN(0.01),
                 max_byte_size=33554432,
                 memory_estimate_period=2000000,
                 grace_period=50,
                 split_criterion='info_gain',
                 split_confidence=0.01,
                 tie_threshold=0.05,
                 binary_split=False,
                 stop_mem_management=False,
                 remove_poor_atts=False,
                 no_preprune=False,
                 leaf_prediction='nba',
                 nb_threshold=0,
                 nominal_attributes=None,
                 random_state=None):
        """AdaptiveRandomForest class constructor."""
        super().__init__()
        self.nb_ensemble = nb_ensemble
        self.max_features = max_features
        self.disable_weighted_vote = disable_weighted_vote
        self.lambda_value = lambda_value
        if isinstance(drift_detection_method, BaseDriftDetector):
            self.drift_detection_method = drift_detection_method
        else:
            self.drift_detection_method = None
        if isinstance(warning_detection_method, BaseDriftDetector):
            self.warning_detection_method = warning_detection_method
        else:
            self.warning_detection_method = None
        self.instances_seen = 0
        self._train_weight_seen_by_model = 0.0
        self.ensemble = None
        self.random_state = check_random_state(random_state)
        if performance_metric in ['acc', 'kappa']:
            self.performance_metric = performance_metric
        else:
            raise ValueError(
                'Invalid performance metric: {}'.format(performance_metric))

        # ARH Hoeffding Tree configuration
        self.max_byte_size = max_byte_size
        self.memory_estimate_period = memory_estimate_period
        self.grace_period = grace_period
        self.split_criterion = split_criterion
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.binary_split = binary_split
        self.stop_mem_management = stop_mem_management
        self.remove_poor_atts = remove_poor_atts
        self.no_preprune = no_preprune
        self.leaf_prediction = leaf_prediction
        self.nb_threshold = nb_threshold
        self.nominal_attributes = nominal_attributes
Пример #4
0
 def __init__(self,
              k=5,
              max_window_size=sys.maxsize,
              leaf_size=30,
              categorical_list=[]):
     super().__init__(k=k,
                      max_window_size=max_window_size,
                      leaf_size=leaf_size,
                      categorical_list=categorical_list)
     self.adwin = ADWIN()
     self.window = None
Пример #5
0
 def reset(self):
     """ reset
     
     Resets the adwin algorithm as well as the base model 
     kept by the KNN base class.
     
     Returns
     -------
     KNNAdwin
         self
     
     """
     self.adwin = ADWIN()
     return super().reset()
Пример #6
0
 def __adjust_ensemble_size(self):
     if len(self.classes) != len(self.ensemble):
         if len(self.classes) > len(self.ensemble):
             for i in range(len(self.ensemble), len(self.classes)):
                 self.ensemble.append(cp.deepcopy(self.h))
                 self.adwin_ensemble.append(ADWIN(self.delta))
                 self.ensemble_length += 1
Пример #7
0
    def __init__(self,
                 h=KNN(),
                 ensemble_length=2,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag'):

        super().__init__()
        # default values
        self.h = h.reset()
        self.ensemble_length = None
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.enable_matrix_codes = None
        self.w = None
        self.delta = None
        self.classes = None
        self.leveraging_algorithm = None
        self.__configure(h, ensemble_length, w, delta, enable_code_matrix,
                         leverage_algorithm)
        self.init_matrix_codes = True

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN(self.delta))
Пример #8
0
def test_adwin(test_path):
    """
    ADWIN drift detection test.
    The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1.
    From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7.

    """
    adwin = ADWIN()
    test_file = os.path.join(test_path, 'drift_stream.npy')
    data_stream = np.load(test_file)
    expected_indices = [1023, 1055, 1087, 1151]
    detected_indices = []

    for i in range(data_stream.size):
        adwin.add_element(data_stream[i])
        if adwin.detected_change():
            detected_indices.append(i)

    assert detected_indices == expected_indices
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):
            true_class = y

            k = np.random.poisson(1.0, self.classifierRandom)
            if k > 0:
                weight = weight * k

            tmp = self.get_class_votes(X, hat)

            class_prediction = get_max_value_index(tmp)

            bl_correct = (true_class == class_prediction)

            if self.estimationErrorWeight is None:
                self.estimationErrorWeight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self.estimationErrorWeight.add_element(add)
            # Detect change with Adwin
            self.ErrorChange = self.estimationErrorWeight.detected_change()

            if (self.ErrorChange is True
                    and old_error > self.get_error_estimation()):
                self.ErrorChange = False

            # Update statistics call LearningNodeNBAdaptive
            super().learn_from_instance(X, y, weight,
                                        hat)  # CHECK changed self to super

            # call ActiveLearningNode
            weight_seen = self.get_weight_seen()

            if weight_seen - self.get_weight_seen_at_last_split_evaluation(
            ) >= hat.grace_period:
                hat._attempt_to_split(self, parent, parent_branch)
                self.set_weight_seen_at_last_split_evaluation(weight_seen)
Пример #10
0
    def __init__(self, h=KNNAdwin(), ensemble_length=2):
        super().__init__()
        # default values
        self.ensemble = None
        self.ensemble_length = None
        self.classes = None
        self.h = h.reset()
        self.__configure(h, ensemble_length)

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN())
Пример #11
0
def demo():
    """ _test_adwin
    
    This demo will insert data into an ADWIN object when will display in which 
    indexes change was detected.
    
    The data stream is simulated as a sequence of randomly generated 0's and 1's. 
    Then the data from indexes 999 to 1999 is changed to a normal distribution of 
    integers from 0 to 7.
    
    """
    adwin = ADWIN()
    size = 2000
    data_stream = np.random.randint(2, size=size)
    for i in range(999, size):
        data_stream[i] = np.random.randint(8)

    for i in range(size):
        adwin.add_element(data_stream[i])
        if adwin.detected_change():
            print('Change has been detected in data: ' + str(data_stream[i]) +
                  ' - of index: ' + str(i))
Пример #12
0
    def reset(self):
        """ reset
        
        Resets all the classifiers, as well as all the ADWIN change
        detectors.
        
        Returns
        -------
        LeverageBagging
            self
        
        """
        self.__configure(self.h, self.ensemble_length, self.w, self.delta, self.enable_matrix_codes)
        self.adwin_ensemble = []
        for i in range(self.ensemble_length):
            self.adwin_ensemble.append(ADWIN(self.delta))
        self.n_detected_changes = 0
        self.classes = None
        self.init_matrix_codes = True

        return self
Пример #13
0
    def __partial_fit(self, X, y):
        n_classes = len(self.classes)
        change = False

        if self.init_matrix_codes:
            self.matrix_codes = np.zeros(
                (self.ensemble_length, len(self.classes)), dtype=int)
            for i in range(self.ensemble_length):
                n_zeros = 0
                n_ones = 0
                while ((n_ones - n_zeros) *
                       (n_ones - n_zeros) > self.ensemble_length % 2):
                    n_zeros = 0
                    n_ones = 0
                    for j in range(len(self.classes)):
                        result = 0
                        if (j == 1) and (len(self.classes) == 2):
                            result = 1 - self.matrix_codes[i][0]
                        else:
                            result = np.random.randint(2)

                        self.matrix_codes[i][j] = result
                        if result == 1:
                            n_ones += 1
                        else:
                            n_zeros += 1
            self.init_matrix_codes = False

        detected_change = False
        X_cp, y_cp = cp.deepcopy(X), cp.deepcopy(y)
        for i in range(self.ensemble_length):
            k = 0.0

            if self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[0]:
                k = np.random.poisson(self.w)

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[1]:
                error = self.adwin_ensemble[i]._estimation
                pred = self.ensemble[i].predict(np.asarray([X]))
                if pred is None:
                    k = 1.0
                elif pred[0] != y:
                    k = 1.0
                elif np.random.rand() < (error / (1.0 - error)):
                    k = 1.0
                else:
                    k = 0.0

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[2]:
                w = 1.0
                k = 0.0 if (np.random.randint(2) == 1) else w

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[3]:
                w = 1.0
                k = 1.0 + np.random.poisson(w)

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[4]:
                w = 1.0
                k = np.random.poisson(1)
                k = w if k > 0 else 0

            if k > 0:
                if self.enable_matrix_codes:
                    y_cp = self.matrix_codes[i][int(y_cp)]
                for l in range(int(k)):
                    self.ensemble[i].partial_fit(np.asarray([X_cp]),
                                                 np.asarray([y_cp]),
                                                 self.classes)

            try:
                pred = self.ensemble[i].predict(np.asarray([X]))
                if pred is not None:
                    add = 1 if (pred[0] == y_cp) else 0
                    error = self.adwin_ensemble[i]._estimation
                    self.adwin_ensemble[i].add_element(add)
                    if self.adwin_ensemble[i].detected_change():
                        if self.adwin_ensemble[i]._estimation > error:
                            change = True
            except ValueError:
                change = False

        if change:
            self.n_detected_changes += 1
            max = 0.0
            imax = -1
            for i in range(self.ensemble_length):
                if max < self.adwin_ensemble[i]._estimation:
                    max = self.adwin_ensemble[i]._estimation
                    imax = i
            if imax != -1:
                self.ensemble[imax].reset()
                self.adwin_ensemble[imax] = ADWIN(self.delta)
        return self
 def __init__(self, initial_class_observations):
     LearningNodeNBAdaptive.__init__(self, initial_class_observations)
     self.estimationErrorWeight = ADWIN()
     self.ErrorChange = False
     self.randomSeed = 1
     self.classifierRandom = random.seed(self.randomSeed)
    class AdaLearningNode(LearningNodeNBAdaptive, NewNode):
        def __init__(self, initial_class_observations):
            LearningNodeNBAdaptive.__init__(self, initial_class_observations)
            self.estimationErrorWeight = ADWIN()
            self.ErrorChange = False
            self.randomSeed = 1
            self.classifierRandom = random.seed(self.randomSeed)

        def calc_byte_size(self):
            byte_size = self.__sizeof__()
            if self.estimationErrorWeight is not None:
                byte_size += self.estimationErrorWeight.get_length_estimation()
            return byte_size

        # Override NewNode
        def number_leaves(self):
            return 1

        # Override NewNode
        def get_error_estimation(self):
            return self.estimationErrorWeight._estimation

        # Override NewNode
        def get_error_width(self):
            return self.estimationErrorWeight._width

        # Override NewNode
        def is_null_error(self):
            return (self.estimationErrorWeight is None)

        def kill_tree_childs(self, hat):
            pass

        # Override NewNode
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):
            true_class = y

            k = np.random.poisson(1.0, self.classifierRandom)
            if k > 0:
                weight = weight * k

            tmp = self.get_class_votes(X, hat)

            class_prediction = get_max_value_index(tmp)

            bl_correct = (true_class == class_prediction)

            if self.estimationErrorWeight is None:
                self.estimationErrorWeight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self.estimationErrorWeight.add_element(add)
            # Detect change with Adwin
            self.ErrorChange = self.estimationErrorWeight.detected_change()

            if (self.ErrorChange is True
                    and old_error > self.get_error_estimation()):
                self.ErrorChange = False

            # Update statistics call LearningNodeNBAdaptive
            super().learn_from_instance(X, y, weight,
                                        hat)  # CHECK changed self to super

            # call ActiveLearningNode
            weight_seen = self.get_weight_seen()

            if weight_seen - self.get_weight_seen_at_last_split_evaluation(
            ) >= hat.grace_period:
                hat._attempt_to_split(self, parent, parent_branch)
                self.set_weight_seen_at_last_split_evaluation(weight_seen)

        # Override LearningNodeNBAdaptive
        def get_class_votes(self, X, ht):

            dist = {}
            prediction_option = ht.leaf_prediction

            if prediction_option == MAJORITY_CLASS:  #MC
                dist = self.get_observed_class_distribution()
            elif prediction_option == NAIVE_BAYES:  #NB
                dist = do_naive_bayes_prediction(
                    X, self._observed_class_distribution,
                    self._attribute_observers)

            # NBAdaptive
            if self._mc_correct_weight > self._nb_correct_weight:
                dist = self.get_observed_class_distribution()
            else:
                dist = do_naive_bayes_prediction(
                    X, self._observed_class_distribution,
                    self._attribute_observers)

            dist_sum = sum(dist.values())  # sum all values in dictionary

            if dist_sum * self.get_error_estimation(
            ) * self.get_error_estimation() > 0.0:
                normalize_values_in_dict(
                    dist_sum * self.get_error_estimation() *
                    self.get_error_estimation(), dist)

            return dist

        # Override NewNode, New for option votes
        def filter_instance_to_leaves(self,
                                      X,
                                      split_parent,
                                      parent_branch,
                                      update_splitter_counts,
                                      found_nodes=None):
            if found_nodes is None:
                found_nodes = []
            found_nodes.append(
                HoeffdingTree.FoundNode(self, split_parent, parent_branch))
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):

            true_class = y
            class_prediction = 0

            if (self.filter_instance_to_leaf(X, parent,
                                             parent_branch).node) is not None:
                class_prediction = get_max_value_index(
                    self.filter_instance_to_leaf(
                        X, parent, parent_branch).node.get_class_votes(X, hat))

            bl_correct = (true_class == class_prediction)

            if self._estimation_error_weight is None:
                self._estimation_error_weight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self._estimation_error_weight.add_element(add)
            # Detect change with Adwin
            self.error_change = self._estimation_error_weight.detected_change()

            if (self.error_change is True
                    and old_error > self.get_error_estimation()):
                self.error_change = False

            #Check condition to build a new alternate tree
            if (self.error_change is True):
                self._alternate_tree = hat._new_learning_node(
                )  # check call to new learning node
                hat._alternateTrees += 1

            #Condition to replace alternate tree
            elif (self._alternate_tree is not None
                  and self._alternate_tree.is_null_error() is False):
                if (self.get_error_width() > error_width_threshold
                        and self._alternate_tree.get_error_width() >
                        error_width_threshold):
                    old_error_rate = self.get_error_estimation()
                    alt_error_rate = self._alternate_tree.get_error_estimation(
                    )
                    fDelta = .05
                    fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (
                        self.get_error_width())

                    bound = math.sqrt(2.0 * old_error_rate *
                                      (1.0 - old_error_rate) *
                                      math.log(2.0 / fDelta) * fN)
                    # To check, bound never less than (old_error_rate - alt_error_rate)
                    if bound < (old_error_rate - alt_error_rate):
                        hat._active_leaf_node_cnt -= self.number_leaves()
                        hat._active_leaf_node_cnt += self._alternate_tree.number_leaves(
                        )
                        self.kill_tree_childs(hat)

                        if parent is not None:
                            parent.set_child(parent_branch,
                                             self._alternate_tree)
                        else:
                            hat._tree_root = hat._tree_root.alternateTree
                        hat._switchAlternateTrees += 1
                    elif (bound < alt_error_rate - old_error_rate):
                        if isinstance(self._alternate_tree,
                                      HAT.ActiveLearningNode):
                            self._alternate_tree = None
                        elif (isinstance(self._alternate_tree,
                                         HAT.ActiveLearningNode)):
                            self._alternate_tree = None
                        else:
                            self._alternate_tree.kill_tree_childs(hat)
                        hat._prunedalternateTree += 1  # hat._pruned_alternate_trees to check

            # Learn_From_Instance alternate Tree and Child nodes
            if self._alternate_tree is not None:
                self._alternate_tree.learn_from_instance(
                    X, y, weight, hat, parent, parent_branch)

            child_branch = self.instance_child_index(X)
            child = self.get_child(child_branch)

            if child is not None:
                child.learn_from_instance(X, y, weight, hat, parent,
                                          parent_branch)
Пример #17
0
# Imports
import numpy as np
from skmultiflow.classification.core.driftdetection.adwin import ADWIN
adwin = ADWIN()
# Simulating a data stream as a normal distribution of 1's and 0's
data_stream = np.random.randint(2, size=2000)
# Changing the data concept from index 999 to 2000
for i in range(999, 2000):
    data_stream[i] = np.random.randint(4, high=8)
# Adding stream elements to ADWIN and verifying if drift occurred
for i in range(2000):
    adwin.add_element(data_stream[i])
    if adwin.detected_change():
        print('Change has been detected in data: ' + str(data_stream[i]) +
              ' - of index: ' + str(i))
    class AdaSplitNode(SplitNode, NewNode):
        def __init__(self, split_test, class_observations, size):
            SplitNode.__init__(self, split_test, class_observations, size)
            self._estimation_error_weight = ADWIN()
            self._alternate_tree = None  # CHECK not HoeffdingTree.Node(), I force alternatetree to be None so that will be that initialized as _new_learning_node (line 154)
            self.error_change = False
            self._random_seed = 1
            self._classifier_random = random.seed(self._random_seed)

        # Override SplitNode
        def calc_byte_size_including_subtree(self):
            byte_size = self.__sizeof__()
            if self._alternate_tree is not None:
                byte_size += self._alternate_tree.calc_byte_size_including_subtree(
                )
            if self._estimation_error_weight is not None:
                byte_size += self._estimation_error_weight.get_length_estimation(
                )

            for child in self._children:
                if child is not None:
                    byte_size += child.calc_byte_size_including_subtree()

            return byte_size

        # Override NewNode
        def number_leaves(self):
            num_of_leaves = 0
            for child in self._children:
                if child is not None:
                    num_of_leaves += child.number_leaves()

            return num_of_leaves

        # Override NewNode
        def get_error_estimation(self):
            return self._estimation_error_weight._estimation

        # Override NewNode
        def get_error_width(self):
            w = 0.0
            if (self.is_null_error() is False):
                w = self._estimation_error_weight._width

            return w

        # Override NewNode
        def is_null_error(self):
            return (self._estimation_error_weight is None)

        # Override NewNode
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):

            true_class = y
            class_prediction = 0

            if (self.filter_instance_to_leaf(X, parent,
                                             parent_branch).node) is not None:
                class_prediction = get_max_value_index(
                    self.filter_instance_to_leaf(
                        X, parent, parent_branch).node.get_class_votes(X, hat))

            bl_correct = (true_class == class_prediction)

            if self._estimation_error_weight is None:
                self._estimation_error_weight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self._estimation_error_weight.add_element(add)
            # Detect change with Adwin
            self.error_change = self._estimation_error_weight.detected_change()

            if (self.error_change is True
                    and old_error > self.get_error_estimation()):
                self.error_change = False

            #Check condition to build a new alternate tree
            if (self.error_change is True):
                self._alternate_tree = hat._new_learning_node(
                )  # check call to new learning node
                hat._alternateTrees += 1

            #Condition to replace alternate tree
            elif (self._alternate_tree is not None
                  and self._alternate_tree.is_null_error() is False):
                if (self.get_error_width() > error_width_threshold
                        and self._alternate_tree.get_error_width() >
                        error_width_threshold):
                    old_error_rate = self.get_error_estimation()
                    alt_error_rate = self._alternate_tree.get_error_estimation(
                    )
                    fDelta = .05
                    fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (
                        self.get_error_width())

                    bound = math.sqrt(2.0 * old_error_rate *
                                      (1.0 - old_error_rate) *
                                      math.log(2.0 / fDelta) * fN)
                    # To check, bound never less than (old_error_rate - alt_error_rate)
                    if bound < (old_error_rate - alt_error_rate):
                        hat._active_leaf_node_cnt -= self.number_leaves()
                        hat._active_leaf_node_cnt += self._alternate_tree.number_leaves(
                        )
                        self.kill_tree_childs(hat)

                        if parent is not None:
                            parent.set_child(parent_branch,
                                             self._alternate_tree)
                        else:
                            hat._tree_root = hat._tree_root.alternateTree
                        hat._switchAlternateTrees += 1
                    elif (bound < alt_error_rate - old_error_rate):
                        if isinstance(self._alternate_tree,
                                      HAT.ActiveLearningNode):
                            self._alternate_tree = None
                        elif (isinstance(self._alternate_tree,
                                         HAT.ActiveLearningNode)):
                            self._alternate_tree = None
                        else:
                            self._alternate_tree.kill_tree_childs(hat)
                        hat._prunedalternateTree += 1  # hat._pruned_alternate_trees to check

            # Learn_From_Instance alternate Tree and Child nodes
            if self._alternate_tree is not None:
                self._alternate_tree.learn_from_instance(
                    X, y, weight, hat, parent, parent_branch)

            child_branch = self.instance_child_index(X)
            child = self.get_child(child_branch)

            if child is not None:
                child.learn_from_instance(X, y, weight, hat, parent,
                                          parent_branch)

        # Override NewNode
        def kill_tree_childs(self, hat):
            for child in self._children:
                if child is not None:
                    # Delete alternate tree if it exists
                    if (isinstance(child, HAT.AdaSplitNode)
                            and child._alternate_tree is not None):
                        self._pruned_alternate_trees += 1
                    # Recursive delete of SplitNodes
                    if isinstance(child, HAT.AdaSplitNode):
                        child.kill_tree_childs(hat)

                    if isinstance(child, HAT.ActiveLearningNode):
                        child = None
                        hat._active_leaf_node_cnt -= 1
                    elif isinstance(child, HAT.InactiveLearningNode):
                        child = None
                        hat._inactive_leaf_node_cnt -= 1

        # override NewNode
        def filter_instance_to_leaves(self,
                                      X,
                                      parent,
                                      parent_branch,
                                      update_splitter_counts,
                                      found_nodes=None):
            if found_nodes is None:
                found_nodes = []

            child_index = self.instance_child_index(X)

            if child_index >= 0:
                child = self.get_child(child_index)

                if child is not None:
                    child.filter_instance_to_leaves(X, parent, parent_branch,
                                                    update_splitter_counts,
                                                    found_nodes)
                else:
                    found_nodes.append(
                        HoeffdingTree.FoundNode(None, self, child_index))
            if self._alternate_tree is not None:
                self._alternate_tree.filter_instance_to_leaves(
                    X, self, -999, update_splitter_counts, found_nodes)
Пример #19
0
 def __init__(self, initial_class_observations):
     LearningNodeNBAdaptive.__init__(self, initial_class_observations)
     self.estimationErrorWeight = ADWIN()
     self.ErrorChange = False
     self._randomSeed = 1
     self._classifier_random = check_random_state(self._randomSeed)
Пример #20
0
 def reset(self):
     self.__configure(self.h, self.ensemble_length)
     self.adwin_ensemble = []
     for i in range(self.ensemble_length):
         self.adwin_ensemble.append(ADWIN())
Пример #21
0
    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Partially fits the model, based on the X and y matrix.

        Since it's an ensemble learner, if X and y matrix of more than one 
        sample are passed, the algorithm will partial fit the model one sample 
        at a time.

        Each sample is trained by each classifier a total of K times, where K 
        is drawn by a Poisson(1) distribution.
        
        Alongside updating the model, the learner will also update ADWIN's 
        statistics over the new samples, so that the change detector can 
        evaluate if a concept drift was detected. In the case drift is detected, 
        the bagging algorithm will find the worst performing classifier and reset 
        its statistics and window.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features) 
            Features matrix used for partially updating the model.

        y: Array-like
            An array-like of all the class labels for the samples in X.

        classes: list 
            List of all existing classes. This is an optional parameter, except 
            for the first partial_fit call, when it becomes obligatory.

        weight: Array-like
            Instance weight. If not provided, uniform weights are assumed.

        Raises
        ------
        ValueError: A ValueError is raised if the 'classes' parameter is not 
        passed in the first partial_fit call, or if they are passed in further 
        calls but differ from the initial classes list passed.

        Returns
        _______
        OzaBaggingAdwin
            self

        """
        r, c = get_dimensions(X)
        if self.classes is None:
            if classes is None:
                raise ValueError("The first partial_fit call should pass all the classes.")
            else:
                self.classes = classes

        if self.classes is not None and classes is not None:
            if set(self.classes) == set(classes):
                pass
            else:
                raise ValueError(
                    "The classes passed to the partial_fit function differ from those passed in an earlier moment.")

        self.__adjust_ensemble_size()
        change_detected = False
        for i in range(self.ensemble_length):
            k = np.random.poisson()
            if k > 0:
                for b in range(k):
                    self.ensemble[i].partial_fit(X, y, classes, weight)

            try:
                pred = self.ensemble[i].predict(X)
                error_estimation = self.adwin_ensemble[i]._estimation
                for j in range(r):
                    if pred[j] is not None:
                        if pred[j] == y[j]:
                            self.adwin_ensemble[i].add_element(1)
                        else:
                            self.adwin_ensemble[i].add_element(0)
                if self.adwin_ensemble[i].detected_change():
                    if self.adwin_ensemble[i]._estimation > error_estimation:
                        change_detected = True
            except ValueError:
                change_detected = False
                pass

        if change_detected:
            max = 0.0
            imax = -1
            for i in range(self.ensemble_length):
                if max < self.adwin_ensemble[i]._estimation:
                    max = self.adwin_ensemble[i]._estimation
                    imax = i
            if imax != -1:
                self.ensemble[imax].reset()
                self.adwin_ensemble[imax] = ADWIN()

        return self
Пример #22
0
class KNNAdwin(KNN):
    """ K-Nearest Neighbors Classifier with ADWIN Change detector 
    
    This Classifier is an improvement from the regular KNN classifier, 
    as it is resistant to concept drift. It utilises the ADWIN change 
    detector to decide which samples to keep and which ones to forget, 
    and by doing so it regulates the sample window size.
     
    To know more about the ADWIN change detector, please visit 
    skmultiflow.classification.core.driftdetection.adwin

    It uses the regular KNN Classifier as a base class, with the 
    major difference that this class keeps a variable size window, 
    instead of a fixed size one and also it updates the adwin algorithm 
    at each partial_fit call.
    
    Parameters
    ----------
    k: int
        The number of nearest neighbors to search for.
        
    max_window_size: int
        The maximum size of the window storing the last viewed samples.
        
    leaf_size: int
        The maximum number of samples that can be stored in one leaf node, 
        which determines from which point the algorithm will switch for a 
        brute-force approach. The bigger this number the faster the tree 
        construction time, but the slower the query time will be.
        
    categorical_list: An array-like
        Each entry is the index of a categorical feature. May be requested 
        further filtering.
        
    Raises
    ------
    NotImplementedError: A few of the functions described here are not 
    implemented since they have no application in this context.
    
    ValueError: A ValueError is raised if the predict function is called 
    before at least k samples have been analyzed by the algorithm.
    
    Examples
    --------
    >>> # Imports
    >>> from skmultiflow.classification.lazy.knn_adwin import KNNAdwin
    >>> from skmultiflow.classification.lazy.knn import KNN
    >>> from skmultiflow.data.file_stream import FileStream
    >>> from skmultiflow.options.file_option import FileOption
    >>> # Setting up the stream
    >>> opt = FileOption('FILE', 'OPT_NAME', 'skmultiflow/datasets/covtype.csv', 'csv', False)
    >>> stream = FileStream(opt, -1, 1)
    >>> stream.prepare_for_use()
    >>> # Setting up the KNNAdwin classifier
    >>> knn_adwin = KNNAdwin(k=8, leaf_size=40, max_window_size=2000)
    >>> # Pre training the classifier with 200 samples
    >>> X, y = stream.next_instance(200)
    >>> knn_adwin = knn_adwin.partial_fit(X, y)
    >>> # Keeping track of sample count and correct prediction count
    >>> n_samples = 0
    >>> corrects = 0
    >>> while n_samples < 5000:
    ...     X, y = stream.next_instance()
    ...     pred = knn_adwin.predict(X)
    ...     if y[0] == pred[0]:
    ...         corrects += 1
    ...     knn_adwin = knn_adwin.partial_fit(X, y)
    ...     n_samples += 1
    >>>
    >>> # Displaying the results
    >>> print('KNN usage example')
    >>> print(str(n_samples) + ' samples analyzed.')
    5000 samples analyzed.
    >>> print("KNNAdwin's performance: " + str(corrects/n_samples))
    KNNAdwin's performance: 0.7798

    """

    def __init__(self, k=5, max_window_size=sys.maxsize, leaf_size=30, categorical_list=[]):
        super().__init__(k=k, max_window_size=max_window_size, leaf_size=leaf_size, categorical_list=categorical_list)
        self.adwin = ADWIN()
        self.window = None

    def reset(self):
        """ reset
        
        Resets the adwin algorithm as well as the base model 
        kept by the KNN base class.
        
        Returns
        -------
        KNNAdwin
            self
        
        """
        self.adwin = ADWIN()
        return super().reset()

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit
        
        Partially fits the model. This is done by updating the window 
        with new samples while also updating the adwin algorithm. Then 
        we verify if a change was detected, and if so, the window is 
        correctly split at the drift moment.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.
            
        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNNAdwin
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))
            if self.window._num_samples >= self.k:
                add = 1 if self.predict(np.asarray([X[i]])) == y[i] else 0
                self.adwin.add_element(add)
            else:
                self.adwin.add_element(0)

        if self.window._num_samples >= self.k:
            changed = self.adwin.detected_change()

            if changed:
                if self.adwin._width < self.window._num_samples:
                    for i in range(self.window._num_samples, self.adwin._width, -1):
                        self.window.delete_element()
        return self