def get_class_votes(self, X, ht):
            # dist = {}
            prediction_option = ht.leaf_prediction
            # MC
            if prediction_option == MAJORITY_CLASS:
                dist = self.get_observed_class_distribution()
            # NB
            elif prediction_option == NAIVE_BAYES:
                dist = do_naive_bayes_prediction(
                    X, self._observed_class_distribution,
                    self._attribute_observers)
            # NBAdaptive
            else:
                if self._mc_correct_weight > self._nb_correct_weight:
                    dist = self.get_observed_class_distribution()
                else:
                    dist = do_naive_bayes_prediction(
                        X, self._observed_class_distribution,
                        self._attribute_observers)

            dist_sum = sum(dist.values())  # sum all values in dictionary
            normalization_factor = dist_sum * self.get_error_estimation(
            ) * self.get_error_estimation()

            if normalization_factor > 0.0:
                normalize_values_in_dict(dist, normalization_factor)

            return dist
def test_normalize_values_in_dict():
    a_dictionary = {}
    for k in range(10):
        a_dictionary[k] = k * 10

    reference = copy(a_dictionary)
    sum_of_values = sum(a_dictionary.values())

    normalize_values_in_dict(a_dictionary)
    for k, v in a_dictionary.items():
        assert a_dictionary[k] == reference[k] / sum_of_values

    normalize_values_in_dict(a_dictionary, factor=1 / sum_of_values)
    for k, v in a_dictionary.items():
        assert a_dictionary[k] == reference[k]
def test_normalize_values_in_dict():
    a_dictionary = {}
    for k in range(1, 11):
        a_dictionary[k] = k*10

    reference = copy(a_dictionary)
    sum_of_values = sum(a_dictionary.values())

    normalize_values_in_dict(a_dictionary)
    for k, v in a_dictionary.items():
        assert a_dictionary[k] == reference[k] / sum_of_values

    normalize_values_in_dict(a_dictionary, factor=1/sum_of_values)
    for k, v in a_dictionary.items():
        assert np.isclose(a_dictionary[k], reference[k])

    b_dictionary = normalize_values_in_dict(a_dictionary, factor=1 / sum_of_values, inplace=False)
    for k, v in a_dictionary.items():
        assert a_dictionary[k] != b_dictionary[k]
    assert id(a_dictionary) != id(b_dictionary)
 def predict_proba(self, X):
     r, _ = get_dimensions(X)
     predictions = []
     for i in range(r):
         votes = self.get_votes_for_instance(X[i]).copy()
         if votes == {}:
             # Tree is empty, all classes equal, default to zero
             predictions.append([0])
         else:
             new_votes = dict((key, d[key]) for d in votes for key in d)
             if sum(new_votes.values()) != 0:
                 normalize_values_in_dict(new_votes)
             if self.classes is not None:
                 y_proba = np.zeros(int(max(self.classes)) + 1)
             else:
                 y_proba = np.zeros(int(max(new_votes.keys())) + 1)
             for key, value in new_votes.items():
                 y_proba[int(key)] = value
             predictions.append(y_proba)
     return np.array(predictions)
    def get_votes_for_instance(self, X):
        if self.ensemble is None:
            self.init_ensemble(X)
        combined_votes = {}

        for i in range(self.n_estimators):
            vote = self.ensemble[i].get_votes_for_instance(X)
            if vote != {} and sum(vote.values()) > 0:
                normalize_values_in_dict(vote)
                if not self.disable_weighted_vote:
                    performance = self.ensemble[i].evaluator.get_accuracy()\
                        if self.performance_metric == 'acc'\
                        else self.ensemble[i].evaluator.get_kappa()
                    if performance != 0.0:  # CHECK How to handle negative (kappa) values?
                        for k in vote:
                            vote[k] = vote[k] * performance
                # Add values
                for k in vote:
                    try:
                        combined_votes[k] += vote[k]
                    except KeyError:
                        combined_votes[k] = vote[k]
        return combined_votes