def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == NAIVE_BAYES: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation( ) * self.get_error_estimation() if normalization_factor > 0.0: normalize_values_in_dict(dist, normalization_factor) return dist
def test_normalize_values_in_dict(): a_dictionary = {} for k in range(10): a_dictionary[k] = k * 10 reference = copy(a_dictionary) sum_of_values = sum(a_dictionary.values()) normalize_values_in_dict(a_dictionary) for k, v in a_dictionary.items(): assert a_dictionary[k] == reference[k] / sum_of_values normalize_values_in_dict(a_dictionary, factor=1 / sum_of_values) for k, v in a_dictionary.items(): assert a_dictionary[k] == reference[k]
def test_normalize_values_in_dict(): a_dictionary = {} for k in range(1, 11): a_dictionary[k] = k*10 reference = copy(a_dictionary) sum_of_values = sum(a_dictionary.values()) normalize_values_in_dict(a_dictionary) for k, v in a_dictionary.items(): assert a_dictionary[k] == reference[k] / sum_of_values normalize_values_in_dict(a_dictionary, factor=1/sum_of_values) for k, v in a_dictionary.items(): assert np.isclose(a_dictionary[k], reference[k]) b_dictionary = normalize_values_in_dict(a_dictionary, factor=1 / sum_of_values, inplace=False) for k, v in a_dictionary.items(): assert a_dictionary[k] != b_dictionary[k] assert id(a_dictionary) != id(b_dictionary)
def predict_proba(self, X): r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = self.get_votes_for_instance(X[i]).copy() if votes == {}: # Tree is empty, all classes equal, default to zero predictions.append([0]) else: new_votes = dict((key, d[key]) for d in votes for key in d) if sum(new_votes.values()) != 0: normalize_values_in_dict(new_votes) if self.classes is not None: y_proba = np.zeros(int(max(self.classes)) + 1) else: y_proba = np.zeros(int(max(new_votes.keys())) + 1) for key, value in new_votes.items(): y_proba[int(key)] = value predictions.append(y_proba) return np.array(predictions)
def get_votes_for_instance(self, X): if self.ensemble is None: self.init_ensemble(X) combined_votes = {} for i in range(self.n_estimators): vote = self.ensemble[i].get_votes_for_instance(X) if vote != {} and sum(vote.values()) > 0: normalize_values_in_dict(vote) if not self.disable_weighted_vote: performance = self.ensemble[i].evaluator.get_accuracy()\ if self.performance_metric == 'acc'\ else self.ensemble[i].evaluator.get_kappa() if performance != 0.0: # CHECK How to handle negative (kappa) values? for k in vote: vote[k] = vote[k] * performance # Add values for k in vote: try: combined_votes[k] += vote[k] except KeyError: combined_votes[k] = vote[k] return combined_votes