def weighted_sum(self, X): """ Get class votes from the sum of rules that fires. The rules are weighted. parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. Returns ------- dict (class_value, weight) The class distribution from the sum of the fired rules. """ final_votes = {} fired_rule = False for rule in self.rule_set: if rule.covers_instance(X): fired_rule = True votes = copy.deepcopy(rule.get_class_votes(X, self)) if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes, inplace=False) final_votes = {k: final_votes.get(k, 0) + votes.get(k, 0) for k in set(final_votes) | set(votes)} if sum(final_votes.values()) != 0: normalize_values_in_dict(final_votes) return final_votes if fired_rule else self.default_rule.get_class_votes(X, self)
def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == ht._MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == ht._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive (default) else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation() * self.get_error_estimation() if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist
def predict_proba(self, X): """Predicts probabilities of all label of the instance(s). Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- numpy.array Predicted the probabilities of all the labels for all instances in X. """ r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = copy.deepcopy(self.get_votes_for_instance(X[i])) if votes == {}: # Tree is empty, all classes equal, default to zero predictions.append([0]) else: if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes, inplace=False) if self.classes is not None: y_proba = np.zeros(int(max(self.classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value predictions.append(y_proba) return np.array(predictions)
def weighted_max(self, X): """ Get class votes from the rule with highest vote weight. parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. Returns ------- dict (class_value, weight) the class distribution from the rule with highest weight. """ highest = 0 final_votes = self.default_rule.get_class_votes(X, self) for rule in self.rule_set: if rule.covers_instance(X): votes = copy.deepcopy(rule.get_class_votes(X, self)) if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes, inplace=False) for v in votes.values(): if v >= highest: highest = v final_votes = votes return final_votes
def get_votes_for_instance(self, X): if self.ensemble is None: self.ensemble = [ self._init_ensemble_member() for _ in range(self.s) ] combined_votes = {} for i in range(self.s): vote = deepcopy(self.ensemble[i].instance_votes(X)) if vote != {} and sum(vote.values()) > 0: vote = normalize_values_in_dict(vote, inplace=False) if self.ensemble.prediction != 0: accuracy = self.ensemble[i].correct / self.ensemble[ i].prediction else: accuracy = 0 if accuracy != 0.0: for k in vote: vote[k] = vote[k] * accuracy # Add values for k in vote: try: combined_votes[k] += vote[k] except KeyError: combined_votes[k] = vote[k] return combined_votes
def predict_proba(self, X): """ Estimates the probability of each sample in X belonging to each of the class-labels. Class probabilities are calculated as the mean predicted class probabilities per base estimator. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the class probabilities. Returns ------- numpy.ndarray of shape (n_samples, n_classes) Predicted class probabilities for all instances in X. If class labels were specified in a `partial_fit` call, the order of the columns matches `self.classes`. If classes were not specified, they are assumed to be 0-indexed. Class probabilities for a sample shall sum to 1 as long as at least one estimators has non-zero predictions. If no estimator can predict probabilities, probabilities of 0 are returned. """ if self.ensemble is None: self._init_ensemble(X) r, _ = get_dimensions(X) y_proba = [] for i in range(r): votes = deepcopy(self.get_votes_for_instance(X[i])) if votes == {}: # Estimator is empty, all classes equal, default to zero if self.classes is not None: y_proba.append(np.zeros(len(self.classes))) else: y_proba.append([0]) else: if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes) if self.classes is not None: votes_array = np.zeros(int(max(self.classes)) + 1) else: votes_array = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): votes_array[int(key)] = value y_proba.append(votes_array) # Set result as np.array if self.classes is not None: y_proba = np.asarray(y_proba) else: # Fill missing values related to unobserved classes to ensure we get a 2D array y_proba = np.asarray( list(itertools.zip_longest(*y_proba, fillvalue=0.0))).T return y_proba
def get_votes_for_instance(self, X): if self.ensemble is None: self.init_ensemble(X) combined_votes = {} for i in range(self.n_estimators): vote = self.ensemble[i].get_votes_for_instance(X) if vote != {} and sum(vote.values()) > 0: normalize_values_in_dict(vote) if not self.disable_weighted_vote: performance = self.ensemble[i].evaluator.get_accuracy()\ if self.performance_metric == 'acc'\ else self.ensemble[i].evaluator.get_kappa() if performance != 0.0: # CHECK How to handle negative (kappa) values? for k in vote: vote[k] = vote[k] * performance # Add values for k in vote: try: combined_votes[k] += vote[k] except KeyError: combined_votes[k] = vote[k] return combined_votes
def get_votes_for_instance(self, X): combined_votes = {} self.estimators_votes = None for i in range(len(self.ensemble)): vote = cp.deepcopy(self.ensemble[i].get_votes_for_instance(X)) if hasattr(self.ensemble[i], 'predict_proba'): ensemble_class_distribution = self.ensemble[i].predict_proba([X]) if self.estimators_votes is None: self.estimators_votes = ensemble_class_distribution else: self.estimators_votes = np.concatenate( (self.estimators_votes, ensemble_class_distribution), axis=1 ) if vote != {} and sum(vote.values()) > 0: vote = normalize_values_in_dict(vote, inplace=True) y_proba_dict = None if self.classes: y_proba = np.zeros(int(max(self.classes)) + 1) y_proba_dict = {index: value for index, value in enumerate(y_proba)} performance = self.ensemble[i].evaluator.accuracy_score() if performance != 0.0: for k in vote: # Multiplying the votes by the performance of each the hoeffding tees in the ensemble vote[k] = vote[k] * performance if y_proba_dict: for key, value in vote.items(): y_proba_dict[float(key)] = value y_proba_dict = vote # Add values for k in vote: try: # Combining the result predicted by each classifier for each instance # combined_votes[k] += vote[k] combined_votes[k] += y_proba_dict[k] except KeyError: # combined_votes[k] = vote[k] combined_votes[k] = y_proba_dict[k] return combined_votes
def predict_proba(self, X): r, _ = get_dimensions(X) y_proba = [] for i in range(r): # Calculating the probability of each class using hoeffding trees in the ensemble for the current instance # (current batch of instances) votes = cp.deepcopy(self.get_votes_for_instance(X[i])) if votes == {}: if self.classes: y = [0 for i in range(len(self.classes))] y_proba.append(y) else: y_proba.append([0]) # y_proba.append([i for i in range(max(self.classes) +1))]) else: if sum(votes.values()) != 0: # Normalizing the votes by dividing each vote from the sum of all the votes votes = normalize_values_in_dict(votes) if self.classes is not None: votes_array = np.zeros(int(max(self.classes)) + 1) else: votes_array = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): try: votes_array[int(key)] = value except: print('this is not ok ') y_proba.append(votes_array) if self.classes is not None: y_proba = np.asarray(y_proba) else: y_proba = np.asarray(list(itertools.zip_longest(*y_proba, fillvalue=0.0))).T # if np.shape(y_proba)[1] == 2: # import pudb; pudb.set_trace() # XXX BREAKPOINT # assert 1 == 1 return y_proba
def predict_proba(self, X): """ Predicts probabilities of all label of the X instance(s) Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- numpy.array Predicted the probabilities of all the labels for all instances in X. """ r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = copy.deepcopy(self.get_votes_for_instance(X[i])) if votes == {}: # Tree is empty, all classes equal, default to zero predictions.append([0]) else: if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes, inplace=False) if self.classes is not None: y_proba = np.zeros(int(max(self.classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value predictions.append(y_proba) if len(y_proba) != 2: assert 1 == 1 # Set result as np.array if self.classes is not None: predictions = np.asarray(predictions) else: # Fill missing values related to unobserved classes to ensure we get a 2D array predictions = np.asarray( list(itertools.zip_longest(*predictions, fillvalue=0.0))).T return predictions
def predict_one(self, X, *, tree=None): prediction_option = tree.leaf_prediction # MC if prediction_option == tree._MAJORITY_CLASS: dist = self.stats # NB elif prediction_option == tree._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self.stats, self.attribute_observers) # NBAdaptive (default) else: dist = super().predict_one(X, tree=tree) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.error_estimation * self.error_estimation if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist