Exemplo n.º 1
0
    def partial_fit(self, X, y, classes=None):

        # Initial preperation
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes_
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encodr.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)
            self.number_of_features = len(X[0])

        # Prune minority
        to_delete = []
        for i, w in enumerate(self.weights_array_min):
            if w <= 0:
                to_delete.append(i)
            self.weights_array_min[i] -= 1
        to_delete.reverse()
        for i in to_delete:
            del self.weights_array_min[i]
            del self.classifier_array_min[i]

        # Prune majority
        to_delete = []
        for i, w in enumerate(self.weights_array_maj):
            if w <= 0:
                to_delete.append(i)
            self.weights_array_maj[i] -= 1
        to_delete.reverse()
        for i in to_delete:
            del self.weights_array_maj[i]
            del self.classifier_array_maj[i]

        # Split data
        minority, majority = minority_majority_split(X, y, self.minority_name, self.majority_name)

        samples, n_of_clust = self._best_number_of_clusters(minority, 10)

        for i in range(n_of_clust):
            self.classifier_array_min.append(clone(self.base_classifier).fit(samples[i]))
            self.weights_array_min.append(self.number_of_classifiers)

        samples, n_of_clust = self._best_number_of_clusters(majority, 10)
        for i in range(n_of_clust):
            self.classifier_array_maj.append(clone(self.base_classifier).fit(samples[i]))
            self.weights_array_maj.append(self.number_of_classifiers)
Exemplo n.º 2
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        self.sub_ensemble_array += [self._new_sub_ensemble(X, y)]

        beta_mean = self._calculate_weights(X, y)

        self.classifier_weights = []
        for b in beta_mean:
            self.classifier_weights.append(math.log(1 / b))

        self.iterator += 1
Exemplo n.º 3
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        res_X, res_y = self._resample(X, y)
        if res_X is None:
            return
        new_classifier = self.base_classifier.fit(res_X, res_y)

        if len(self.classifier_array) < self.number_of_classifiers:
            self.classifier_array.append(new_classifier)
            self.classifier_weights.append(1)
        else:
            auc_array = []
            for i in range(len(self.classifier_array)):
                y_score = self.classifier_array[i].predict_proba(res_X)
                fpr, tpr, thresholds = metrics.roc_curve(res_y, y_score[:, 0])
                auc_array += [metrics.auc(fpr, tpr)]

            j = np.argmin(auc_array)

            y_score = new_classifier.predict_proba(res_X)
            fpr, tpr, thresholds = metrics.roc_curve(res_y, y_score[:, 0])
            new_auc = metrics.auc(fpr, tpr)

            if new_auc > auc_array[j]:
                self.classifier_array[j] = new_classifier
                auc_array[j] = new_auc

            # auc_norm = auc_array / np.linalg.norm(auc_array)
            for i in range(len(self.classifier_array)):
                self.classifier_weights[i] = auc_array[i]
Exemplo n.º 4
0
    def partial_fit(self, X, y, classes=None):
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        res_X, res_y = self._resample(X, y)

        new_classifier = clone(self.base_classifier).fit(res_X, res_y)

        self.classifier_array.append(new_classifier)
        if len(self.classifier_array) >= self.number_of_classifiers:
            worst = np.argmin(self.classifier_weights)
            del self.classifier_array[worst]
            del self.classifier_weights[worst]

        # s1 = 1/float(len(X))
        weights = []
        for clf in self.classifier_array:
            proba = clf.predict_proba(X)
            s2 = 0
            for i, x in enumerate(X):
                try:
                    probas = proba[i][y[i]]
                except IndexError:
                    probas = 0
                s2 += math.pow((1 - probas), 2)
            if s2 == 0:
                s2 = 0.00001
            s2 = s2 / len(X)
            s3 = math.log(1 / s2)
            weights.append(s3)

        self.classifier_weights = weights
Exemplo n.º 5
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes_ = self.label_encoder.classes_
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        res_X, res_y = self._resample(X, y)

        new_classifier = clone(self.base_classifier).fit(res_X, res_y)

        self.classifier_array.append(new_classifier)
        # if len(self.classifier_array) >= self.number_of_classifiers:
        #     worst = np.argmin(self.classifier_weights)
        #     del self.classifier_array[worst]
        #     del self.classifier_weights[worst]

        # s1 = 1/float(len(X))
        weights = []
        for clf in self.classifier_array:
            proba = clf.predict_proba(X)
            s2 = 0
            for i, x in enumerate(X):
                probas = proba[i][y[i]]
                s2 += math.pow((1 - probas), 2)
            if s2 == 0:
                s2 = 0.00001
            s2 = s2 / len(X)
            s3 = math.log(1 / s2)
            weights.append(s3)

        self.classifier_weights = weights
Exemplo n.º 6
0
    def partial_fit(self, X, y, classes=None):
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        new_minority = self._resample(X, y)
        minority, majority = minority_majority_split(X, y, self.minority_name,
                                                     self.majority_name)

        if not majority.any():
            return

        majority_split = np.array_split(majority, self.number_of_classifiers)

        self.classifier_array = []
        for m_s in majority_split:
            res_X = np.concatenate((m_s, new_minority), axis=0)
            res_y = len(m_s) * [self.majority_name
                                ] + len(new_minority) * [self.minority_name]
            new_classifier = clone(self.base_classifier).fit(res_X, res_y)
            self.classifier_array.append(new_classifier)

        self.time_stamp += 1
Exemplo n.º 7
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        new_minority = self._resample(X, y)
        minority, majority = minority_majority_split(X, y, self.minority_name, self.majority_name)

        if not majority.any():
            print("majoirty empty")
            return

        majority_split = np.array_split(majority, self.number_of_classifiers)

        self.classifier_array = []
        for m_s in majority_split:
            res_X = np.concatenate((m_s, new_minority), axis=0)
            res_y = len(m_s)*[self.majority_name] + len(new_minority)*[self.minority_name]
            new_classifier = self.base_classifier.fit(res_X, res_y)
            self.classifier_array.append(new_classifier)
Exemplo n.º 8
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        self.number_of_instances = len(y)

        if self.classifier_array:
            y_pred = self.predict(X)
            y_pred = self.label_encoder.transform(y_pred)

            E = (1 - metrics.accuracy_score(y, y_pred))

            eq = np.equal(y, y_pred)

            w = np.zeros(eq.shape)
            w[eq == True] = E / float(self.number_of_instances)
            w[eq == False] = 1 / float(self.number_of_instances)

            w_sum = np.sum(w)

            D = w / w_sum

            res_X, res_y = self._resample(X, y)

            new_classifier = clone(self.base_classifier).fit(res_X, res_y)
            self.classifier_array.append(new_classifier)

            beta = []
            epsilon_sum_array = []

            for j in range(len(self.classifier_array)):
                y_pred = self.classifier_array[j].predict(X)

                eq_2 = np.not_equal(y, y_pred).astype(int)

                epsilon_sum = np.sum(eq_2 * D)
                epsilon_sum_array.append(epsilon_sum)

                if epsilon_sum > 0.5:
                    if j is len(self.classifier_array) - 1:
                        self.classifier_array[j] = clone(
                            self.base_classifier).fit(res_X, res_y)
                    else:
                        epsilon_sum = 0.5

            epsilon_sum_array = np.array(epsilon_sum_array)
            beta = epsilon_sum_array / (1 - epsilon_sum_array)

            sigma = []
            a = self.param_a
            b = self.param_b
            t = len(self.classifier_array)
            k = np.array(range(t))

            sigma = 1 / (1 + np.exp(-a * (t - k - b)))

            sigma_mean = []
            for k in range(t):
                sigma_sum = np.sum(sigma[0:t - k])
                sigma_mean.append(sigma[k] / sigma_sum)

            beta_mean = []
            for k in range(t):
                beta_sum = np.sum(sigma_mean[0:t - k] * beta[0:t - k])
                beta_mean.append(beta_sum)

            self.classifier_weights = []
            for b in beta_mean:
                self.classifier_weights.append(math.log(1 / b))

            if t >= self.number_of_classifiers:
                ind = np.argmax(beta_mean)
                del self.classifier_array[ind]
                del self.classifier_weights[ind]

        else:
            res_X, res_y = self._resample(X, y)

            new_classifier = clone(self.base_classifier).fit(res_X, res_y)
            self.classifier_array.append(new_classifier)
            self.classifier_weights = [1]
Exemplo n.º 9
0
    def partial_fit(self, X, y, classes=None):

        # ________________________________________
        # Initial preperation

        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)
            self.number_of_features = len(X[0])

        # ________________________________________
        # Get stored data

        new_X, new_y = [], []

        for tmp_X, tmp_y in zip(self.stored_X, self.stored_y):
            new_X.extend(tmp_X)
            new_y.extend(tmp_y)

        new_X.extend(X)
        new_y.extend(y)

        new_X = np.array(new_X)
        new_y = np.array(new_y)

        # ________________________________________
        # Undersample and store new data

        und_X, und_y = self.undersampling.fit_resample(X, y)

        self.stored_X.append(und_X)
        self.stored_y.append(und_y)

        if len(self.stored_X) > self.number_of_chunks:
                del self.stored_X[0]
                del self.stored_y[0]

        # ________________________________________
        # Oversample when below ratio

        minority, majority = minority_majority_split(new_X, new_y, self.minority_name, self.majority_name)
        ratio = len(minority)/len(majority)

        if ratio < self.balance_ratio:
            new_X, new_y = self.oversampling.fit_resample(new_X, new_y)

        # ________________________________________
        # Train classifier

        self.clf = self.base_classifier.fit(new_X, new_y)
Exemplo n.º 10
0
    def partial_fit(self, X, y, classes=None):
        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)

        if self.classifier_array:
            y_pred = self.predict(X)
            E = metrics.accuracy_score(y, y_pred)
            w = []
            for i in range(len(y)):
                if y[i] is y_pred[i]:
                    w.append(E/float(len(y)))
                else:
                    w.append(1/float(len(y)))

            D = []
            w_sum = np.sum(w)
            for i in range(len(y)):
                D.append(w[i]/w_sum)

            res_X, res_y = self._resample(X, y)

            new_classifier = self.base_classifier.fit(res_X, res_y)
            self.classifier_array.append(new_classifier)

            epsilon = []
            beta = []
            for j in range(len(self.classifier_array)):
                y_pred = self.classifier_array[j].predict(X)
                for i in range(len(y)):
                    if y[i] is not y_pred[i]:
                        epsilon.append(D[i])
                epsilon_sum = np.sum(epsilon)
                if epsilon_sum > 0.5:
                    if j is len(self.classifier_array) - 1:
                        self.classifier_array[j] = self.base_classifier.fit(res_X, res_y)
                    else:
                        epsilon_sum = 0.5
                beta.append(epsilon_sum / float(1 - epsilon_sum))

            sigma = []
            a = self.param_a
            b = self.param_b
            t = self.iterator
            for k in range(t):
                sigma.append(1/(1 + math.exp(-a*(t-k-b))))

            sigma_mean = []
            for k in range(t):
                sigma_sum = 0
                for j in range(t-k):
                    sigma_sum += sigma[j]
                sigma_mean.append(sigma[k]/sigma_sum)

            beta_mean = []
            for k in range(t):
                beta_sum = 0
                for j in range(t-k):
                    beta_sum += sigma_mean[j]*beta[j]
                beta_mean.append(beta_sum)

            self.classifier_weights = []
            for b in beta_mean:
                self.classifier_weights.append(math.log(1/b))

            self.iterator += 1

        else:
            res_X, res_y = self._resample(X, y)

            new_classifier = self.base_classifier.fit(res_X, res_y)
            self.classifier_array.append(new_classifier)
            self.classifier_weights = [1]
            self.iterator += 1
Exemplo n.º 11
0
    def partial_fit(self, X, y, classes=None):

        # ________________________________________
        # Initial preperation

        if classes is None and self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(y)
            self.classes = self.label_encoder.classes
        elif self.classes is None:
            self.label_encoder = LabelEncoder()
            self.label_encoder.fit(classes)
            self.classes = classes

        if classes[0] is "positive":
            self.minority_name = self.label_encoder.transform(classes[0])
            self.majority_name = self.label_encoder.transform(classes[1])
        elif classes[1] is "positive":
            self.minority_name = self.label_encoder.transform(classes[1])
            self.majority_name = self.label_encoder.transform(classes[0])

        y = self.label_encoder.transform(y)

        if self.minority_name is None or self.majority_name is None:
            self.minority_name, self.majority_name = minority_majority_name(y)
            self.number_of_features = len(X[0])

        # ________________________________________
        # Drift detector

        if (self.drift_detector is not None):
            dd_pred = self.drift_detector.predict(X)
            score = geometric_mean_score(dd_pred, y)
            if score / np.mean(self.metrics_array) < 0.7:
                self.drift_detector = None
                self.metrics_array = []
                self.classifier_array = []
                self.stored_X = []
                self.stored_y = []
            else:
                self.metrics_array.append(score)

        # ________________________________________
        # Get stored data

        new_X, new_y = [], []

        for tmp_X, tmp_y in zip(self.stored_X, self.stored_y):
            new_X.extend(tmp_X)
            new_y.extend(tmp_y)

        new_X.extend(X)
        new_y.extend(y)

        new_X = np.array(new_X)
        new_y = np.array(new_y)

        # ________________________________________
        # Undersample and store new data

        und_X, und_y = self.undersampling.fit_resample(X, y)

        self.stored_X.append(und_X)
        self.stored_y.append(und_y)

        # ________________________________________
        # Oversample when below ratio

        minority, majority = minority_majority_split(new_X, new_y,
                                                     self.minority_name,
                                                     self.majority_name)
        ratio = len(minority) / len(majority)

        if ratio < self.balance_ratio:
            new_X, new_y = self.oversampling.fit_resample(new_X, new_y)

        # ________________________________________
        # Train new classifier

        self.classifier_array.append(
            clone(self.base_classifier).fit(new_X, new_y))
        if len(self.classifier_array) >= self.number_of_classifiers:
            del self.classifier_array[0]
            del self.stored_X[0]
            del self.stored_y[0]

        if self.drift_detector is None:
            self.drift_detector = MLPClassifier((10))
        self.drift_detector.partial_fit(new_X, new_y, np.unique(new_y))

        self.iteration += 1