Exemplo n.º 1
0
    def __init__(self,
                 nb_ensemble=10,
                 max_features='auto',
                 disable_weighted_vote=False,
                 lambda_value=6,
                 performance_metric='acc',
                 drift_detection_method: BaseDriftDetector = ADWIN(0.001),
                 warning_detection_method: BaseDriftDetector = ADWIN(0.01),
                 max_byte_size=33554432,
                 memory_estimate_period=2000000,
                 grace_period=50,
                 split_criterion='info_gain',
                 split_confidence=0.01,
                 tie_threshold=0.05,
                 binary_split=False,
                 stop_mem_management=False,
                 remove_poor_atts=False,
                 no_preprune=False,
                 leaf_prediction='nba',
                 nb_threshold=0,
                 nominal_attributes=None,
                 random_state=None):
        """AdaptiveRandomForest class constructor."""
        super().__init__()
        self.nb_ensemble = nb_ensemble
        self.max_features = max_features
        self.disable_weighted_vote = disable_weighted_vote
        self.lambda_value = lambda_value
        if isinstance(drift_detection_method, BaseDriftDetector):
            self.drift_detection_method = drift_detection_method
        else:
            self.drift_detection_method = None
        if isinstance(warning_detection_method, BaseDriftDetector):
            self.warning_detection_method = warning_detection_method
        else:
            self.warning_detection_method = None
        self.instances_seen = 0
        self._train_weight_seen_by_model = 0.0
        self.ensemble = None
        self.random_state = check_random_state(random_state)
        if performance_metric in ['acc', 'kappa']:
            self.performance_metric = performance_metric
        else:
            raise ValueError(
                'Invalid performance metric: {}'.format(performance_metric))

        # ARH Hoeffding Tree configuration
        self.max_byte_size = max_byte_size
        self.memory_estimate_period = memory_estimate_period
        self.grace_period = grace_period
        self.split_criterion = split_criterion
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.binary_split = binary_split
        self.stop_mem_management = stop_mem_management
        self.remove_poor_atts = remove_poor_atts
        self.no_preprune = no_preprune
        self.leaf_prediction = leaf_prediction
        self.nb_threshold = nb_threshold
        self.nominal_attributes = nominal_attributes
Exemplo n.º 2
0
 def __adjust_ensemble_size(self):
     if len(self.classes) != len(self.ensemble):
         if len(self.classes) > len(self.ensemble):
             for i in range(len(self.ensemble), len(self.classes)):
                 self.ensemble.append(cp.deepcopy(self.h))
                 self.adwin_ensemble.append(ADWIN(self.delta))
                 self.ensemble_length += 1
 def __init__(self, split_test, class_observations):
     SplitNode.__init__(self, split_test, class_observations)
     self._estimation_error_weight = ADWIN()
     self._alternate_tree = None
     self.error_change = False
     self._random_seed = 1
     self._classifier_random = random.seed(self._random_seed)
Exemplo n.º 4
0
    def __init__(self,
                 h=KNN(),
                 ensemble_length=2,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag'):

        super().__init__()
        # default values
        self.h = h.reset()
        self.ensemble_length = None
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.enable_matrix_codes = None
        self.w = None
        self.delta = None
        self.classes = None
        self.leveraging_algorithm = None
        self.__configure(h, ensemble_length, w, delta, enable_code_matrix,
                         leverage_algorithm)
        self.init_matrix_codes = True

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN(self.delta))
 def __init__(self, split_test, class_observations, size):
     SplitNode.__init__(self, split_test, class_observations, size)
     self._estimation_error_weight = ADWIN()
     self._alternate_tree = None  # CHECK not HoeffdingTree.Node(), I force alternatetree to be None so that will be that initialized as _new_learning_node (line 154)
     self.error_change = False
     self._random_seed = 1
     self._classifier_random = random.seed(self._random_seed)
Exemplo n.º 6
0
 def __init__(self,
              k=5,
              max_window_size=sys.maxsize,
              leaf_size=30,
              categorical_list=[]):
     super().__init__(k=k,
                      max_window_size=max_window_size,
                      leaf_size=leaf_size,
                      categorical_list=categorical_list)
     self.adwin = ADWIN()
     self.window = None
Exemplo n.º 7
0
    def __init__(self, h=KNNAdwin(), ensemble_length=2):
        super().__init__()
        # default values
        self.ensemble = None
        self.ensemble_length = None
        self.classes = None
        self.h = h.reset()
        self.__configure(h, ensemble_length)

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN())
Exemplo n.º 8
0
 def reset(self):
     """ reset
     
     Resets the adwin algorithm as well as the base model 
     kept by the KNN base class.
     
     Returns
     -------
     KNNAdwin
         self
     
     """
     self.adwin = ADWIN()
     return super().reset()
Exemplo n.º 9
0
def test_adwin(test_path):
    """
    ADWIN drift detection test.
    The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1.
    From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7.

    """
    adwin = ADWIN()
    test_file = os.path.join(test_path, 'drift_stream.npy')
    data_stream = np.load(test_file)
    expected_indices = [1023, 1055, 1087, 1151]
    detected_indices = []

    for i in range(data_stream.size):
        adwin.add_element(data_stream[i])
        if adwin.detected_change():
            detected_indices.append(i)

    assert detected_indices == expected_indices
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):
            true_class = y

            k = np.random.poisson(1.0, self.classifierRandom)
            if k > 0:
                weight = weight * k

            tmp = self.get_class_votes(X, hat)

            class_prediction = get_max_value_index(tmp)

            bl_correct = (true_class == class_prediction)

            if self.estimationErrorWeight is None:
                self.estimationErrorWeight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self.estimationErrorWeight.add_element(add)
            # Detect change with Adwin
            self.ErrorChange = self.estimationErrorWeight.detected_change()

            if (self.ErrorChange is True
                    and old_error > self.get_error_estimation()):
                self.ErrorChange = False

            # Update statistics call LearningNodeNBAdaptive
            super().learn_from_instance(X, y, weight,
                                        hat)  # CHECK changed self to super

            # call ActiveLearningNode
            weight_seen = self.get_weight_seen()

            if weight_seen - self.get_weight_seen_at_last_split_evaluation(
            ) >= hat.grace_period:
                hat._attempt_to_split(self, parent, parent_branch)
                self.set_weight_seen_at_last_split_evaluation(weight_seen)
Exemplo n.º 11
0
    def reset(self):
        """ reset
        
        Resets all the classifiers, as well as all the ADWIN change
        detectors.
        
        Returns
        -------
        LeverageBagging
            self
        
        """
        self.__configure(self.h, self.ensemble_length, self.w, self.delta, self.enable_matrix_codes)
        self.adwin_ensemble = []
        for i in range(self.ensemble_length):
            self.adwin_ensemble.append(ADWIN(self.delta))
        self.n_detected_changes = 0
        self.classes = None
        self.init_matrix_codes = True

        return self
Exemplo n.º 12
0
def demo():
    """ _test_adwin
    
    This demo will insert data into an ADWIN object when will display in which 
    indexes change was detected.
    
    The data stream is simulated as a sequence of randomly generated 0's and 1's. 
    Then the data from indexes 999 to 1999 is changed to a normal distribution of 
    integers from 0 to 7.
    
    """
    adwin = ADWIN()
    size = 2000
    data_stream = np.random.randint(2, size=size)
    for i in range(999, size):
        data_stream[i] = np.random.randint(8)

    for i in range(size):
        adwin.add_element(data_stream[i])
        if adwin.detected_change():
            print('Change has been detected in data: ' + str(data_stream[i]) +
                  ' - of index: ' + str(i))
 def __init__(self, initial_class_observations):
     LearningNodeNBAdaptive.__init__(self, initial_class_observations)
     self.estimationErrorWeight = ADWIN()
     self.ErrorChange = False
     self.randomSeed = 1
     self.classifierRandom = random.seed(self.randomSeed)
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):

            true_class = y
            class_prediction = 0

            if (self.filter_instance_to_leaf(X, parent,
                                             parent_branch).node) is not None:
                class_prediction = get_max_value_index(
                    self.filter_instance_to_leaf(
                        X, parent, parent_branch).node.get_class_votes(X, hat))

            bl_correct = (true_class == class_prediction)

            if self._estimation_error_weight is None:
                self._estimation_error_weight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self._estimation_error_weight.add_element(add)
            # Detect change with Adwin
            self.error_change = self._estimation_error_weight.detected_change()

            if (self.error_change is True
                    and old_error > self.get_error_estimation()):
                self.error_change = False

            #Check condition to build a new alternate tree
            if (self.error_change is True):
                self._alternate_tree = hat._new_learning_node(
                )  # check call to new learning node
                hat._alternateTrees += 1

            #Condition to replace alternate tree
            elif (self._alternate_tree is not None
                  and self._alternate_tree.is_null_error() is False):
                if (self.get_error_width() > error_width_threshold
                        and self._alternate_tree.get_error_width() >
                        error_width_threshold):
                    old_error_rate = self.get_error_estimation()
                    alt_error_rate = self._alternate_tree.get_error_estimation(
                    )
                    fDelta = .05
                    fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (
                        self.get_error_width())

                    bound = math.sqrt(2.0 * old_error_rate *
                                      (1.0 - old_error_rate) *
                                      math.log(2.0 / fDelta) * fN)
                    # To check, bound never less than (old_error_rate - alt_error_rate)
                    if bound < (old_error_rate - alt_error_rate):
                        hat._active_leaf_node_cnt -= self.number_leaves()
                        hat._active_leaf_node_cnt += self._alternate_tree.number_leaves(
                        )
                        self.kill_tree_childs(hat)

                        if parent is not None:
                            parent.set_child(parent_branch,
                                             self._alternate_tree)
                        else:
                            hat._tree_root = hat._tree_root.alternateTree
                        hat._switchAlternateTrees += 1
                    elif (bound < alt_error_rate - old_error_rate):
                        if isinstance(self._alternate_tree,
                                      HAT.ActiveLearningNode):
                            self._alternate_tree = None
                        elif (isinstance(self._alternate_tree,
                                         HAT.ActiveLearningNode)):
                            self._alternate_tree = None
                        else:
                            self._alternate_tree.kill_tree_childs(hat)
                        hat._prunedalternateTree += 1  # hat._pruned_alternate_trees to check

            # Learn_From_Instance alternate Tree and Child nodes
            if self._alternate_tree is not None:
                self._alternate_tree.learn_from_instance(
                    X, y, weight, hat, parent, parent_branch)

            child_branch = self.instance_child_index(X)
            child = self.get_child(child_branch)

            if child is not None:
                child.learn_from_instance(X, y, weight, hat, parent,
                                          parent_branch)
Exemplo n.º 15
0
 def __init__(self, initial_class_observations):
     LearningNodeNBAdaptive.__init__(self, initial_class_observations)
     self.estimationErrorWeight = ADWIN()
     self.ErrorChange = False
     self._randomSeed = 1
     self._classifier_random = check_random_state(self._randomSeed)
Exemplo n.º 16
0
    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Partially fits the model, based on the X and y matrix.

        Since it's an ensemble learner, if X and y matrix of more than one 
        sample are passed, the algorithm will partial fit the model one sample 
        at a time.

        Each sample is trained by each classifier a total of K times, where K 
        is drawn by a Poisson(1) distribution.
        
        Alongside updating the model, the learner will also update ADWIN's 
        statistics over the new samples, so that the change detector can 
        evaluate if a concept drift was detected. In the case drift is detected, 
        the bagging algorithm will find the worst performing classifier and reset 
        its statistics and window.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features) 
            Features matrix used for partially updating the model.

        y: Array-like
            An array-like of all the class labels for the samples in X.

        classes: list 
            List of all existing classes. This is an optional parameter, except 
            for the first partial_fit call, when it becomes obligatory.

        weight: Array-like
            Instance weight. If not provided, uniform weights are assumed.

        Raises
        ------
        ValueError: A ValueError is raised if the 'classes' parameter is not 
        passed in the first partial_fit call, or if they are passed in further 
        calls but differ from the initial classes list passed.

        Returns
        _______
        OzaBaggingAdwin
            self

        """
        r, c = get_dimensions(X)
        if self.classes is None:
            if classes is None:
                raise ValueError("The first partial_fit call should pass all the classes.")
            else:
                self.classes = classes

        if self.classes is not None and classes is not None:
            if set(self.classes) == set(classes):
                pass
            else:
                raise ValueError(
                    "The classes passed to the partial_fit function differ from those passed in an earlier moment.")

        self.__adjust_ensemble_size()
        change_detected = False
        for i in range(self.ensemble_length):
            k = np.random.poisson()
            if k > 0:
                for b in range(k):
                    self.ensemble[i].partial_fit(X, y, classes, weight)

            try:
                pred = self.ensemble[i].predict(X)
                error_estimation = self.adwin_ensemble[i]._estimation
                for j in range(r):
                    if pred[j] is not None:
                        if pred[j] == y[j]:
                            self.adwin_ensemble[i].add_element(1)
                        else:
                            self.adwin_ensemble[i].add_element(0)
                if self.adwin_ensemble[i].detected_change():
                    if self.adwin_ensemble[i]._estimation > error_estimation:
                        change_detected = True
            except ValueError:
                change_detected = False
                pass

        if change_detected:
            max = 0.0
            imax = -1
            for i in range(self.ensemble_length):
                if max < self.adwin_ensemble[i]._estimation:
                    max = self.adwin_ensemble[i]._estimation
                    imax = i
            if imax != -1:
                self.ensemble[imax].reset()
                self.adwin_ensemble[imax] = ADWIN()

        return self
Exemplo n.º 17
0
    def __partial_fit(self, X, y):
        n_classes = len(self.classes)
        change = False

        if self.init_matrix_codes:
            self.matrix_codes = np.zeros(
                (self.ensemble_length, len(self.classes)), dtype=int)
            for i in range(self.ensemble_length):
                n_zeros = 0
                n_ones = 0
                while ((n_ones - n_zeros) *
                       (n_ones - n_zeros) > self.ensemble_length % 2):
                    n_zeros = 0
                    n_ones = 0
                    for j in range(len(self.classes)):
                        result = 0
                        if (j == 1) and (len(self.classes) == 2):
                            result = 1 - self.matrix_codes[i][0]
                        else:
                            result = np.random.randint(2)

                        self.matrix_codes[i][j] = result
                        if result == 1:
                            n_ones += 1
                        else:
                            n_zeros += 1
            self.init_matrix_codes = False

        detected_change = False
        X_cp, y_cp = cp.deepcopy(X), cp.deepcopy(y)
        for i in range(self.ensemble_length):
            k = 0.0

            if self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[0]:
                k = np.random.poisson(self.w)

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[1]:
                error = self.adwin_ensemble[i]._estimation
                pred = self.ensemble[i].predict(np.asarray([X]))
                if pred is None:
                    k = 1.0
                elif pred[0] != y:
                    k = 1.0
                elif np.random.rand() < (error / (1.0 - error)):
                    k = 1.0
                else:
                    k = 0.0

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[2]:
                w = 1.0
                k = 0.0 if (np.random.randint(2) == 1) else w

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[3]:
                w = 1.0
                k = 1.0 + np.random.poisson(w)

            elif self.leveraging_algorithm == self.LEVERAGE_ALGORITHMS[4]:
                w = 1.0
                k = np.random.poisson(1)
                k = w if k > 0 else 0

            if k > 0:
                if self.enable_matrix_codes:
                    y_cp = self.matrix_codes[i][int(y_cp)]
                for l in range(int(k)):
                    self.ensemble[i].partial_fit(np.asarray([X_cp]),
                                                 np.asarray([y_cp]),
                                                 self.classes)

            try:
                pred = self.ensemble[i].predict(np.asarray([X]))
                if pred is not None:
                    add = 1 if (pred[0] == y_cp) else 0
                    error = self.adwin_ensemble[i]._estimation
                    self.adwin_ensemble[i].add_element(add)
                    if self.adwin_ensemble[i].detected_change():
                        if self.adwin_ensemble[i]._estimation > error:
                            change = True
            except ValueError:
                change = False

        if change:
            self.n_detected_changes += 1
            max = 0.0
            imax = -1
            for i in range(self.ensemble_length):
                if max < self.adwin_ensemble[i]._estimation:
                    max = self.adwin_ensemble[i]._estimation
                    imax = i
            if imax != -1:
                self.ensemble[imax].reset()
                self.adwin_ensemble[imax] = ADWIN(self.delta)
        return self
Exemplo n.º 18
0
 def reset(self):
     self.__configure(self.h, self.ensemble_length)
     self.adwin_ensemble = []
     for i in range(self.ensemble_length):
         self.adwin_ensemble.append(ADWIN())
Exemplo n.º 19
0
# Imports
import numpy as np
from skmultiflow.classification.core.driftdetection.adwin import ADWIN
adwin = ADWIN()
# Simulating a data stream as a normal distribution of 1's and 0's
data_stream = np.random.randint(2, size=2000)
# Changing the data concept from index 999 to 2000
for i in range(999, 2000):
    data_stream[i] = np.random.randint(4, high=8)
# Adding stream elements to ADWIN and verifying if drift occurred
for i in range(2000):
    adwin.add_element(data_stream[i])
    if adwin.detected_change():
        print('Change has been detected in data: ' + str(data_stream[i]) +
              ' - of index: ' + str(i))