def weighted_sum(self, X):
        """ Get class votes from the sum of rules that fires.
         The rules are weighted.

        parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes.

        Returns
        -------
        dict (class_value, weight)
            The class distribution from the sum of the fired rules.

        """
        final_votes = {}
        fired_rule = False
        for rule in self.rule_set:
            if rule.covers_instance(X):
                fired_rule = True
                votes = copy.deepcopy(rule.get_class_votes(X, self))
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes, inplace=False)
                final_votes = {k: final_votes.get(k, 0) + votes.get(k, 0) for k in set(final_votes) | set(votes)}
                if sum(final_votes.values()) != 0:
                    normalize_values_in_dict(final_votes)

        return final_votes if fired_rule else self.default_rule.get_class_votes(X, self)
示例#2
0
    def get_class_votes(self, X, ht):
        # dist = {}
        prediction_option = ht.leaf_prediction
        # MC
        if prediction_option == ht._MAJORITY_CLASS:
            dist = self.get_observed_class_distribution()
        # NB
        elif prediction_option == ht._NAIVE_BAYES:
            dist = do_naive_bayes_prediction(X, self._observed_class_distribution,
                                             self._attribute_observers)
        # NBAdaptive (default)
        else:
            if self._mc_correct_weight > self._nb_correct_weight:
                dist = self.get_observed_class_distribution()
            else:
                dist = do_naive_bayes_prediction(X, self._observed_class_distribution,
                                                 self._attribute_observers)

        dist_sum = sum(dist.values())  # sum all values in dictionary
        normalization_factor = dist_sum * self.get_error_estimation() * self.get_error_estimation()

        if normalization_factor > 0.0:
            dist = normalize_values_in_dict(dist, normalization_factor, inplace=False)

        return dist
    def predict_proba(self, X):
        """Predicts probabilities of all label of the instance(s).

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        numpy.array
            Predicted the probabilities of all the labels for all instances in X.

        """
        r, _ = get_dimensions(X)
        predictions = []
        for i in range(r):
            votes = copy.deepcopy(self.get_votes_for_instance(X[i]))
            if votes == {}:
                # Tree is empty, all classes equal, default to zero
                predictions.append([0])
            else:
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes, inplace=False)
                if self.classes is not None:
                    y_proba = np.zeros(int(max(self.classes)) + 1)
                else:
                    y_proba = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    y_proba[int(key)] = value
                predictions.append(y_proba)
        return np.array(predictions)
    def weighted_max(self, X):
        """ Get class votes from the rule with highest vote weight.

        parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes.

        Returns
        -------
        dict (class_value, weight)
            the class distribution from the rule with highest weight.

        """

        highest = 0
        final_votes = self.default_rule.get_class_votes(X, self)
        for rule in self.rule_set:
            if rule.covers_instance(X):
                votes = copy.deepcopy(rule.get_class_votes(X, self))
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes, inplace=False)
                for v in votes.values():
                    if v >= highest:
                        highest = v
                        final_votes = votes

        return final_votes
    def get_votes_for_instance(self, X):
        if self.ensemble is None:
            self.ensemble = [
                self._init_ensemble_member() for _ in range(self.s)
            ]
        combined_votes = {}

        for i in range(self.s):
            vote = deepcopy(self.ensemble[i].instance_votes(X))
            if vote != {} and sum(vote.values()) > 0:
                vote = normalize_values_in_dict(vote, inplace=False)
                if self.ensemble.prediction != 0:
                    accuracy = self.ensemble[i].correct / self.ensemble[
                        i].prediction
                else:
                    accuracy = 0
                if accuracy != 0.0:
                    for k in vote:
                        vote[k] = vote[k] * accuracy

                # Add values
                for k in vote:
                    try:
                        combined_votes[k] += vote[k]
                    except KeyError:
                        combined_votes[k] = vote[k]
        return combined_votes
示例#6
0
    def predict_proba(self, X):
        """ Estimates the probability of each sample in X belonging to each of the class-labels.

        Class probabilities are calculated as the mean predicted class probabilities
        per base estimator.

        Parameters
        ----------
         X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the class probabilities.

        Returns
        -------
        numpy.ndarray of shape (n_samples, n_classes)
            Predicted class probabilities for all instances in X.
            If class labels were specified in a `partial_fit` call, the order of the columns
            matches `self.classes`.
            If classes were not specified, they are assumed to be 0-indexed.
            Class probabilities for a sample shall sum to 1 as long as at least one estimators
            has non-zero predictions.
            If no estimator can predict probabilities, probabilities of 0 are returned.
        """
        if self.ensemble is None:
            self._init_ensemble(X)

        r, _ = get_dimensions(X)
        y_proba = []
        for i in range(r):
            votes = deepcopy(self.get_votes_for_instance(X[i]))
            if votes == {}:
                # Estimator is empty, all classes equal, default to zero
                if self.classes is not None:
                    y_proba.append(np.zeros(len(self.classes)))
                else:
                    y_proba.append([0])
            else:
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes)
                if self.classes is not None:
                    votes_array = np.zeros(int(max(self.classes)) + 1)

                else:
                    votes_array = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    votes_array[int(key)] = value
                y_proba.append(votes_array)
        # Set result as np.array
        if self.classes is not None:
            y_proba = np.asarray(y_proba)
        else:
            # Fill missing values related to unobserved classes to ensure we get a 2D array
            y_proba = np.asarray(
                list(itertools.zip_longest(*y_proba, fillvalue=0.0))).T
        return y_proba
    def get_votes_for_instance(self, X):
        if self.ensemble is None:
            self.init_ensemble(X)
        combined_votes = {}

        for i in range(self.n_estimators):
            vote = self.ensemble[i].get_votes_for_instance(X)
            if vote != {} and sum(vote.values()) > 0:
                normalize_values_in_dict(vote)
                if not self.disable_weighted_vote:
                    performance = self.ensemble[i].evaluator.get_accuracy()\
                        if self.performance_metric == 'acc'\
                        else self.ensemble[i].evaluator.get_kappa()
                    if performance != 0.0:  # CHECK How to handle negative (kappa) values?
                        for k in vote:
                            vote[k] = vote[k] * performance
                # Add values
                for k in vote:
                    try:
                        combined_votes[k] += vote[k]
                    except KeyError:
                        combined_votes[k] = vote[k]
        return combined_votes
    def get_votes_for_instance(self, X):
        combined_votes = {}
        self.estimators_votes = None

        for i in range(len(self.ensemble)):
            vote = cp.deepcopy(self.ensemble[i].get_votes_for_instance(X))
            if hasattr(self.ensemble[i], 'predict_proba'):
                ensemble_class_distribution = self.ensemble[i].predict_proba([X])

            if self.estimators_votes is None:
                self.estimators_votes = ensemble_class_distribution
            else:
                self.estimators_votes = np.concatenate(
                    (self.estimators_votes, ensemble_class_distribution),
                    axis=1
                )



            if vote != {} and sum(vote.values()) > 0:
                vote = normalize_values_in_dict(vote, inplace=True)
                y_proba_dict = None
                if self.classes:
                    y_proba = np.zeros(int(max(self.classes)) + 1)
                    y_proba_dict = {index: value for index, value in enumerate(y_proba)}

                performance = self.ensemble[i].evaluator.accuracy_score()
                if performance != 0.0:
                    for k in vote:
                        # Multiplying the votes by the performance of each the hoeffding tees in the ensemble
                        vote[k] = vote[k] * performance

                if y_proba_dict:
                    for key, value in vote.items():
                        y_proba_dict[float(key)] = value

                y_proba_dict = vote

                # Add values
                for k in vote:
                    try:
                        # Combining the result predicted by each classifier for each instance
                        #                        combined_votes[k] += vote[k]
                        combined_votes[k] += y_proba_dict[k]
                    except KeyError:
                        #                        combined_votes[k] = vote[k]
                        combined_votes[k] = y_proba_dict[k]
        return combined_votes
    def predict_proba(self, X):

        r, _ = get_dimensions(X)
        y_proba = []
        for i in range(r):
            # Calculating the probability of each class using hoeffding trees in the ensemble for the current instance
            # (current batch of instances)
            votes = cp.deepcopy(self.get_votes_for_instance(X[i]))

            if votes == {}:
                if self.classes:
                    y = [0 for i in range(len(self.classes))]
                    y_proba.append(y)
                else:
                    y_proba.append([0])

                #                    y_proba.append([i for i in range(max(self.classes) +1))])

            else:
                if sum(votes.values()) != 0:
                    # Normalizing the votes by dividing each vote from the sum of all the votes
                    votes = normalize_values_in_dict(votes)

                if self.classes is not None:
                    votes_array = np.zeros(int(max(self.classes)) + 1)
                else:
                    votes_array = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    try:
                        votes_array[int(key)] = value
                    except:
                        print('this is not ok ')
                y_proba.append(votes_array)

        if self.classes is not None:
            y_proba = np.asarray(y_proba)
        else:
            y_proba = np.asarray(list(itertools.zip_longest(*y_proba, fillvalue=0.0))).T
        #        if np.shape(y_proba)[1] == 2:
        #            import pudb; pudb.set_trace()  # XXX BREAKPOINT
        #            assert 1 == 1
        return y_proba
示例#10
0
    def predict_proba(self, X):
        """ Predicts probabilities of all label of the X instance(s)

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        numpy.array
            Predicted the probabilities of all the labels for all instances in X.

        """
        r, _ = get_dimensions(X)
        predictions = []
        for i in range(r):
            votes = copy.deepcopy(self.get_votes_for_instance(X[i]))
            if votes == {}:
                # Tree is empty, all classes equal, default to zero
                predictions.append([0])
            else:
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes, inplace=False)
                if self.classes is not None:
                    y_proba = np.zeros(int(max(self.classes)) + 1)
                else:
                    y_proba = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    y_proba[int(key)] = value
                predictions.append(y_proba)
                if len(y_proba) != 2:
                    assert 1 == 1
        # Set result as np.array
        if self.classes is not None:
            predictions = np.asarray(predictions)
        else:
            # Fill missing values related to unobserved classes to ensure we get a 2D array
            predictions = np.asarray(
                list(itertools.zip_longest(*predictions, fillvalue=0.0))).T
        return predictions
示例#11
0
    def predict_one(self, X, *, tree=None):
        prediction_option = tree.leaf_prediction
        # MC
        if prediction_option == tree._MAJORITY_CLASS:
            dist = self.stats
        # NB
        elif prediction_option == tree._NAIVE_BAYES:
            dist = do_naive_bayes_prediction(X, self.stats,
                                             self.attribute_observers)
        # NBAdaptive (default)
        else:
            dist = super().predict_one(X, tree=tree)

        dist_sum = sum(dist.values())  # sum all values in dictionary
        normalization_factor = dist_sum * self.error_estimation * self.error_estimation

        if normalization_factor > 0.0:
            dist = normalize_values_in_dict(dist,
                                            normalization_factor,
                                            inplace=False)

        return dist