def __init__(self, window_size=100, max_models=10):
     self.H = []
     self.h = None
     # TODO
     self.window_size = window_size
     self.window = InstanceWindow(max_size=window_size, dtype=float)
     self.max_models = max_models
 def partial_fit(self, X, y=None, classes=None):
     # TODO
     r, c = get_dimensions(X)
     if self.window is None:
         self.window = InstanceWindow(max_size=self.window_size,
                                      dtype=float)
     # models = []
     modeles = 0
     if not self.H:
         # Slice pretraining set
         debut = 0
         fin = self.window_size
         while (modeles < self.max_models):
             X_batch = X[debut:fin, :]
             y_batch = y[debut:fin]
             debut += self.window_size
             fin += self.window_size
             self.h = DecisionTreeClassifier()
             self.h.fit(X_batch, y_batch)
             self.H.append(self.h)  # <-- and append it to the ensemble
             modeles += 1
     else:
         for i in range(r):
             self.window.add_element(np.asarray([X[i]]),
                                     np.asarray([[y[i]]]))
         for model in range(modeles):
             self.h = DecisionTreeClassifier()
             self.h.fit(self.window.get_attributes_matrix(),
                        self.window.get_targets_matrix())
             self.H.append(self.h)  # <-- and append it to the ensemble
     return self
Exemplo n.º 3
0
 def __init__(self, window_size = 100, max_models = 100):
     self.H = []
     self.h = None
     self.window_size = window_size
     self.max_models = max_models
     self.window = InstanceWindow(window_size)
     self.j = 0
class BatchClassifier:
    def __init__(self, window_size=100, max_models=10):
        self.H = []
        self.h = None
        # TODO
        self.window_size = window_size
        self.window = InstanceWindow(max_size=window_size, dtype=float)
        self.max_models = max_models

    def partial_fit(self, X, y=None, classes=None):
        # TODO
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.window_size,
                                         dtype=float)
        # models = []
        modeles = 0
        if not self.H:
            # Slice pretraining set
            debut = 0
            fin = self.window_size
            while (modeles < self.max_models):
                X_batch = X[debut:fin, :]
                y_batch = y[debut:fin]
                debut += self.window_size
                fin += self.window_size
                self.h = DecisionTreeClassifier()
                self.h.fit(X_batch, y_batch)
                self.H.append(self.h)  # <-- and append it to the ensemble
                modeles += 1
        else:
            for i in range(r):
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            for model in range(modeles):
                self.h = DecisionTreeClassifier()
                self.h.fit(self.window.get_attributes_matrix(),
                           self.window.get_targets_matrix())
                self.H.append(self.h)  # <-- and append it to the ensemble
        return self

    def predict(self, X):
        # TODO
        N, _ = X.shape
        predictions = []
        y = []
        for h in self.H:
            y.append(h.predict(X))
        for i in range(N):
            votes = Counter([j[i] for j in y])
            if votes == {}:
                # Tree is empty, all classes equal, default to zero
                predictions.append(0)
            else:
                predictions.append(max(votes, key=votes.get))
        return predictions
Exemplo n.º 5
0
class BatchClassifier:
    def __init__(self, window_size=100, max_models=10):
        self.H = []
        self.h = None
        self.window_size = window_size
        self.window = InstanceWindow(max_size=window_size, dtype=float)
        self.num_models = max_models
        # TODO
        return

    def partial_fit(self, X, y=None, classes=None):
        # Update window with new data
        r, c = get_dimensions(X)

        if self.window is None:
            self.window = InstanceWindow(max_size=self.window_size)

        for i in range(r):
            self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))
            # If window is full, create and train new Decision Tree
            if self.window._num_samples == self.window_size:
                self.h = DecisionTreeClassifier()
                self.h.fit(self.window.get_attributes_matrix(),
                           self.window.get_targets_matrix())
                # Add new Decision Tree to model set
                self._add_to_buffer(self.h)
                # Clear window
                self.window = InstanceWindow(max_size=self.window_size,
                                             dtype=float)
            return
        return

        return self

    def predict(self, X):
        N, D = X.shape

        # Check there is at least a Decision Tree fitted
        if len(self.H) == 0:
            #            print('Returning zeros, no model yet')
            return zeros(N)

        maj = np.argmax(self._predict_proba(X), axis=1)
        #        print('Returning predictions ' + str(maj))
        return maj

    def _predict_proba(self, X):
        avg = np.average(np.asarray([clf.predict_proba(X) for clf in self.H]),
                         axis=0)
        return avg

    def _add_to_buffer(self, item):
        if len(self.H) == self.num_models:
            self.H.pop(0)
        self.H.append(item)
        return self
Exemplo n.º 6
0
 def __init__(self, k=5, max_window_size=1000, leaf_size=30, categorical_list=[]):
     super().__init__()
     self.k = k
     self.max_window_size = max_window_size
     self.c = 0
     self.window = InstanceWindow(max_size=max_window_size, dtype=float)
     self.first_fit = True
     self.classes = []
     self.leaf_size = leaf_size
     self.categorical_list = categorical_list
Exemplo n.º 7
0
    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit
        
        Partially fits the model. This is done by updating the window 
        with new samples while also updating the adwin algorithm. Then 
        we verify if a change was detected, and if so, the window is 
        correctly split at the drift moment.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.
            
        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNNAdwin
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            if self.window._num_samples >= self.k:
                add = 1 if self.predict(np.asarray([X[i]])) == y[i] else 0
                self.adwin.add_element(add)
            else:
                self.adwin.add_element(0)

        if self.window._num_samples >= self.k:
            changed = self.adwin.detected_change()

            if changed:
                if self.adwin._width < self.window._num_samples:
                    for i in range(self.window._num_samples, self.adwin._width,
                                   -1):
                        self.window.delete_element()
        return self
Exemplo n.º 8
0
    def partial_fit(self, X, y=None, classes=None):

        # Get information on the input stream
        r, c = get_dimensions(X)

        # DEBUG MESSAGES
        # print("Begin MAX H "+str(self.max_models))
        # print("r:" +str(r)+" c:" +str(c))

        for i in range(r):
            # Check if the window is instanciated
            if self.window is None:
                self.window = InstanceWindow(self.window_size)

            # Add an element to the window (1 row)
            self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))

            # Increment the counter for the n_elements
            self.j+=1
            # Create the model (DT)
            if self.h is None :
                self.h = DecisionTreeClassifier()

            # Check if the window is full
            if self.j == self.window_size:
                # A new model has to be generated
                # print("### FITTING MODEL "+str(self.n_DT)+" UNTIL RECORD "+str(i)+" ###")
                # Train the new model
                X_batch=self.window.get_attributes_matrix()
                y_batch=self.window.get_targets_matrix()
                self.h.fit(X_batch,y_batch)
                # Keep only self.max_models : pop the oldest to push a new one
                if(len(self.H) == self.max_models):
                    self.H.pop(0)
                self.H.append(self.h)
                # Update the counters
                # self.n_DT+=1
                self.j=0
                # DEBUG MESSAGES
                # print("CURRENT LEN H "+str(len(self.H)))
                # print("CURRENT MAX H "+str(self.max_models))
                # print("HELLO WORLD "+str(self.H))

        return self
Exemplo n.º 9
0
    def fit(self, X, y, classes=None, weight=None):
        """ fit
        
        Fits the model on the samples X and targets y. This is actually the 
        function as the partial fit.
        
        For the K-Nearest Neighbors Classifier, fitting the model is the 
        equivalent of inserting the newer samples in the observed window, 
        and if the size_limit is reached, removing older results. To store 
        the viewed samples we use a InstanceWindow object. For this class' 
        documentation please visit skmultiflow.core.utils.data_structures
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.
            
        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNN
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
        return self
class BatchClassifier:
    def __init__(self, window_size=100, max_models=10):
        self.window = InstanceWindow(max_size=window_size)
        self.H = []
        # Current classifier
        self.h = 0
        self.max_models = max_models

    def partial_fit(self, X, y=None, classes=None):
        # if not initialized
        if self.H is None:
            self.H = []
        r, c = get_dimensions(X)
        for i in range(r):
            self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))
        clf = DecisionTreeClassifier()
        clf.fit(X, y)
        self.h %= 10
        self.H[clf] = clf
        self.h += 1

        # N.B.: The 'classes' option is not important for this classifier
        # HINT: You can build a decision tree model on a set of data like this:
        #       h = DecisionTreeClassifier()
        #       h.fit(X_batch,y_batch)
        #       self.H.append(h) # <-- and append it to the ensemble

        return self

    def predict(self, X):
        N, D = X.shape
        # You also need to change this line to return your prediction instead of 0s:
        # TODO predict using the max_models, and return the majority class
        y = []
        for clf, i in self.H, range(self.max_models):
            y.append(clf.predict(X))

        return zeros(N)
Exemplo n.º 11
0
    def partial_fit(self, X, y=None, classes=None):
        # Update window with new data
        r, c = get_dimensions(X)

        if self.window is None:
            self.window = InstanceWindow(max_size=self.window_size)

        for i in range(r):
            self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))
            # If window is full, create and train new Decision Tree
            if self.window._num_samples == self.window_size:
                self.h = DecisionTreeClassifier()
                self.h.fit(self.window.get_attributes_matrix(),
                           self.window.get_targets_matrix())
                # Add new Decision Tree to model set
                self._add_to_buffer(self.h)
                # Clear window
                self.window = InstanceWindow(max_size=self.window_size,
                                             dtype=float)
            return
        return

        return self
Exemplo n.º 12
0
class KNN(BaseClassifier):
    """ K-Nearest Neighbors Classifier
    
    This is a non-parametric classification method. The output of this
    algorithm are the k closest training examples to the query sample 
    X.
    
    It works by keeping track of a fixed number of training samples, in 
    our case it keeps track of the last max_window_size training samples.
    Then, whenever a query request is executed, the algorithm will search 
    its stored samples and find the closest ones using a selected distance 
    metric.
    
    To store the samples, while reducing search times, we use a structure 
    called KD Tree (a K Dimensional Tree, for k dimensional problems). 
    Although we do have our own KDTree implementation, which accepts 
    custom metrics, we recommend using the standard scikit-learn KDTree,  
    that even though doesn't accept custom metrics, is optimized and will 
    function faster.
    
    Parameters
    ----------
    k: int
        The number of nearest neighbors to search for.
        
    max_window_size: int
        The maximum size of the window storing the last viewed samples.
        
    leaf_size: int
        The maximum number of samples that can be stored in one leaf node, 
        which determines from which point the algorithm will switch for a 
        brute-force approach. The bigger this number the faster the tree 
        construction time, but the slower the query time will be.
        
    categorical_list: An array-like
        Each entry is the index of a categorical feature. May be requested 
        further filtering.
    
    Raises
    ------
    NotImplementedError: A few of the functions described here are not 
    implemented since they have no application in this context.
    
    ValueError: A ValueError is raised if the predict function is called 
    before at least k samples have been analyzed by the algorithm.
    
    Notes
    -----
    For a KDTree functionality explanation, please see our KDTree 
    documentation, under skmultiflow.lazy.neighbors.kdtree.
    
    This classifier is not optimal for a mixture of categorical and 
    numerical features.
    
    If you wish to use our KDTree implementation please refer to this class' 
    function __predict_proba
    
    Examples
    --------
    >>> # Imports
    >>> from skmultiflow.classification.lazy.knn import KNN
    >>> from skmultiflow.data.file_stream import FileStream
    >>> from skmultiflow.options.file_option import FileOption
    >>> # Setting up the stream
    >>> opt = FileOption('FILE', 'OPT_NAME', 'skmultiflow/datasets/sea_big.csv', 'csv', False)
    >>> stream = FileStream(opt, -1, 1)
    >>> stream.prepare_for_use()
    >>> # Pre training the classifier with 200 samples
    >>> X, y = stream.next_instance(200)
    >>> knn = KNN(k=8, max_window_size=2000, leaf_size=40)
    >>> knn.partial_fit(X, y)
    >>> # Preparing the processing of 5000 samples and correct prediction count
    >>> n_samples = 0
    >>> corrects = 0
    >>> while n_samples < 5000:
    ...     X, y = stream.next_instance()
    ...     my_pred = knn.predict(X)
    ...     if y[0] == my_pred[0]:
    ...         corrects += 1
    ...     knn = knn.partial_fit(X, y)
    ...     n_samples += 1
    >>>
    >>> # Displaying results
    >>> print('KNN usage example')
    >>> print(str(n_samples) + ' samples analyzed.')
    5000 samples analyzed.
    >>> print("KNN's performance: " + str(corrects/n_samples))
    KNN's performance: 0.868
    
    """
    def __init__(self,
                 k=5,
                 max_window_size=1000,
                 leaf_size=30,
                 categorical_list=[]):
        super().__init__()
        self.k = k
        self.max_window_size = max_window_size
        self.c = 0
        self.window = InstanceWindow(max_size=max_window_size, dtype=float)
        self.first_fit = True
        self.classes = []
        self.leaf_size = leaf_size
        self.categorical_list = categorical_list

    def fit(self, X, y, classes=None, weight=None):
        """ fit
        
        Fits the model on the samples X and targets y. This is actually the 
        function as the partial fit.
        
        For the K-Nearest Neighbors Classifier, fitting the model is the 
        equivalent of inserting the newer samples in the observed window, 
        and if the size_limit is reached, removing older results. To store 
        the viewed samples we use a InstanceWindow object. For this class' 
        documentation please visit skmultiflow.core.utils.data_structures
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.
            
        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNN
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
        return self

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit
        
        Fits the model on the samples X and targets y.
        
        For the K-Nearest Neighbors Classifier, fitting the model is the 
        equivalent of inserting the newer samples in the observed window, 
        and if the size_limit is reached, removing older results. To store 
        the viewed samples we use a InstanceWindow object. For this class' 
        documentation please visit skmultiflow.core.utils.data_structures
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.

        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNN
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
        return self

    def reset(self):
        self.window = None
        return self

    def predict(self, X):
        """ predict
        
        Predicts the label of the X sample, by searching the KDTree for 
        the k-Nearest Neighbors.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            All the samples we want to predict the label for.
            
        Returns
        -------
        list
            A list containing the predicted labels for all instances in X.
        
        """
        r, c = get_dimensions(X)
        probs = self.predict_proba(X)
        preds = []
        for i in range(r):
            preds.append(self.classes[probs[i].index(np.max(probs[i]))])
        return preds

    def _predict(self, X):
        raise NotImplementedError

    def predict_proba(self, X):
        """ predict_proba
         
        Calculates the probability of each sample in X belonging to each 
        of the labels, based on the knn algorithm.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
        
        Raises
        ------
        ValueError: If there is an attempt to call this function before, 
        at least, k samples have been analyzed by the learner, a ValueError 
        is raised.
        
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.classes) elements, each of which represents 
            the probability that the i-th sample of X belongs to a certain label.
         
        """
        if self.window is None:
            raise ValueError(
                "KNN should be partially fitted on at least k samples before doing any prediction."
            )
        if self.window._num_samples < self.k:
            raise ValueError(
                "KNN should be partially fitted on at least k samples before doing any prediction."
            )
        probs = []
        r, c = get_dimensions(X)

        self.classes = list(set().union(
            self.classes, np.unique(self.window.get_targets_matrix())))

        new_dist, new_ind = self.__predict_proba(X)

        for i in range(r):
            classes = [0 for j in range(len(self.classes))]
            for index in new_ind[i]:
                classes[self.classes.index(
                    self.window.get_targets_matrix()[index])] += 1
            probs.append([x / len(new_ind) for x in classes])

        return probs

    def __predict_proba(self, X):
        """ __predict_proba
        
        Private implementation of the predict_proba method.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
        
        Returns
        -------
        tuple list
            One list with the k-nearest neighbor's distances and another 
            one with their indexes.
        
        Notes
        -----
        If you wish to use our own KDTree implementation please comment 
        the third line of this function and uncomment the first and 
        second lines.
        
        """
        #tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean',
        #              categorical_list=self.categorical_list, return_distance=True)

        tree = sk.KDTree(self.window.get_attributes_matrix(),
                         self.leaf_size,
                         metric='euclidean')
        dist, ind = tree.query(np.asarray(X), k=self.k)
        return dist, ind

    def score(self, X, y):
        raise NotImplementedError

    def get_info(self):
        return 'KNN Classifier: max_window_size: ' + str(self.max_window_size) + \
            ' - leaf_size: ' + str(self.leaf_size)
Exemplo n.º 13
0
class KNNAdwin(KNN):
    """ K-Nearest Neighbors Classifier with ADWIN Change detector 
    
    This Classifier is an improvement from the regular KNN classifier, 
    as it is resistant to concept drift. It utilises the ADWIN change 
    detector to decide which samples to keep and which ones to forget, 
    and by doing so it regulates the sample window size.
     
    To know more about the ADWIN change detector, please visit 
    skmultiflow.classification.core.drift_detection.adwin

    It uses the regular KNN Classifier as a base class, with the 
    major difference that this class keeps a variable size window, 
    instead of a fixed size one and also it updates the adwin algorithm 
    at each partial_fit call.
    
    Parameters
    ----------
    k: int
        The number of nearest neighbors to search for.
        
    max_window_size: int
        The maximum size of the window storing the last viewed samples.
        
    leaf_size: int
        The maximum number of samples that can be stored in one leaf node, 
        which determines from which point the algorithm will switch for a 
        brute-force approach. The bigger this number the faster the tree 
        construction time, but the slower the query time will be.
        
    categorical_list: An array-like
        Each entry is the index of a categorical feature. May be requested 
        further filtering.
        
    Raises
    ------
    NotImplementedError: A few of the functions described here are not 
    implemented since they have no application in this context.
    
    ValueError: A ValueError is raised if the predict function is called 
    before at least k samples have been analyzed by the algorithm.
    
    Examples
    --------
    >>> # Imports
    >>> from skmultiflow.classification.lazy.knn_adwin import KNNAdwin
    >>> from skmultiflow.classification.lazy.knn import KNN
    >>> from skmultiflow.data.file_stream import FileStream
    >>> # Setting up the stream
    >>> stream = FileStream('skmultiflow/datasets/covtype.csv', -1, 1)
    >>> stream.prepare_for_use()
    >>> # Setting up the KNNAdwin classifier
    >>> knn_adwin = KNNAdwin(k=8, leaf_size=40, max_window_size=2000)
    >>> # Pre training the classifier with 200 samples
    >>> X, y = stream.next_sample(200)
    >>> knn_adwin = knn_adwin.partial_fit(X, y)
    >>> # Keeping track of sample count and correct prediction count
    >>> n_samples = 0
    >>> corrects = 0
    >>> while n_samples < 5000:
    ...     X, y = stream.next_sample()
    ...     pred = knn_adwin.predict(X)
    ...     if y[0] == pred[0]:
    ...         corrects += 1
    ...     knn_adwin = knn_adwin.partial_fit(X, y)
    ...     n_samples += 1
    >>>
    >>> # Displaying the results
    >>> print('KNN usage example')
    >>> print(str(n_samples) + ' samples analyzed.')
    5000 samples analyzed.
    >>> print("KNNAdwin's performance: " + str(corrects/n_samples))
    KNNAdwin's performance: 0.7798

    """
    def __init__(self,
                 k=5,
                 max_window_size=sys.maxsize,
                 leaf_size=30,
                 categorical_list=[]):
        super().__init__(k=k,
                         max_window_size=max_window_size,
                         leaf_size=leaf_size,
                         categorical_list=categorical_list)
        self.adwin = ADWIN()
        self.window = None

    def reset(self):
        """ reset
        
        Resets the adwin algorithm as well as the base model 
        kept by the KNN base class.
        
        Returns
        -------
        KNNAdwin
            self
        
        """
        self.adwin = ADWIN()
        return super().reset()

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit
        
        Partially fits the model. This is done by updating the window 
        with new samples while also updating the adwin algorithm. Then 
        we verify if a change was detected, and if so, the window is 
        correctly split at the drift moment.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.
            
        y: Array-like
            An array-like containing the classification targets for all 
            samples in X.
            
        classes: Not used.

        weight: Not used.
        
        Returns
        -------
        KNNAdwin
            self
        
        """
        r, c = get_dimensions(X)
        if self.window is None:
            self.window = InstanceWindow(max_size=self.max_window_size)

        for i in range(r):
            if r > 1:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            else:
                self.window.add_element(np.asarray([X[i]]),
                                        np.asarray([[y[i]]]))
            if self.window._num_samples >= self.k:
                add = 1 if self.predict(np.asarray([X[i]])) == y[i] else 0
                self.adwin.add_element(add)
            else:
                self.adwin.add_element(0)

        if self.window._num_samples >= self.k:
            changed = self.adwin.detected_change()

            if changed:
                if self.adwin._width < self.window._num_samples:
                    for i in range(self.window._num_samples, self.adwin._width,
                                   -1):
                        self.window.delete_element()
        return self
Exemplo n.º 14
0
class BatchClassifier:

    def __init__(self, window_size = 100, max_models = 100):
        self.H = []
        self.h = None
        self.window_size = window_size
        self.max_models = max_models
        self.window = InstanceWindow(window_size)
        self.j = 0
        # self.n_DT=0

    def partial_fit(self, X, y=None, classes=None):

        # Get information on the input stream
        r, c = get_dimensions(X)

        # DEBUG MESSAGES
        # print("Begin MAX H "+str(self.max_models))
        # print("r:" +str(r)+" c:" +str(c))

        for i in range(r):
            # Check if the window is instanciated
            if self.window is None:
                self.window = InstanceWindow(self.window_size)

            # Add an element to the window (1 row)
            self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]]))

            # Increment the counter for the n_elements
            self.j+=1
            # Create the model (DT)
            if self.h is None :
                self.h = DecisionTreeClassifier()

            # Check if the window is full
            if self.j == self.window_size:
                # A new model has to be generated
                # print("### FITTING MODEL "+str(self.n_DT)+" UNTIL RECORD "+str(i)+" ###")
                # Train the new model
                X_batch=self.window.get_attributes_matrix()
                y_batch=self.window.get_targets_matrix()
                self.h.fit(X_batch,y_batch)
                # Keep only self.max_models : pop the oldest to push a new one
                if(len(self.H) == self.max_models):
                    self.H.pop(0)
                self.H.append(self.h)
                # Update the counters
                # self.n_DT+=1
                self.j=0
                # DEBUG MESSAGES
                # print("CURRENT LEN H "+str(len(self.H)))
                # print("CURRENT MAX H "+str(self.max_models))
                # print("HELLO WORLD "+str(self.H))

        return self

    def predict(self, X):
        # TODO
        N,D = X.shape
        # print("### PREDICTING "+str(X)+" ###")
        # print("N:" +str(N)+" D:" +str(D))
        # Set the predictions to zero
        predictions = zeros(len(self.H)) if len(self.H) > 0 else 0

        # Compute predictions with the current models
        # print("CURRENT LEN H "+str(len(self.H)))
        for i in range(len(self.H)):
            predictions[i] = self.H[i].predict(X)
        
        # print("PREDICTIONS: "+str(predictions))
        # print("FINAL PRED: "+str(np.bincount(asarray(predictions, dtype=int64)).argmax()))
        # # Return Majority class of predictions
        # return ndarray(shape=(N,), buffer=np.bincount(asarray(predictions, dtype=int64)).argmax())
        return predictions
 def __init__(self, window_size=100, max_models=10):
     self.window = InstanceWindow(max_size=window_size)
     self.H = []
     # Current classifier
     self.h = 0
     self.max_models = max_models