def normalize_sample(self, X): """ Normalize the features in order to have the same influence during training. Parameters ---------- X: list or array or numpy.ndarray features. Returns ------- array: normalized samples """ normalized_sample = [] for i in range(len(X)): if (self._nominal_attributes is not None and i not in self._nominal_attributes) and self.samples_seen > 1: mean = self.sum_of_attribute_values[i] / self.samples_seen sd = compute_sd(self.sum_of_attribute_squares[i], self.sum_of_attribute_values[i], self.samples_seen) if sd > 0: normalized_sample.append(float(X[i] - mean) / (3 * sd)) else: normalized_sample.append(0.0) else: normalized_sample.append(0.0) if self.samples_seen > 1: normalized_sample.append( 1.0) # Value to be multiplied with the constant factor else: normalized_sample.append(0.0) return np.asarray(normalized_sample)
def predict(self, X): """Predicts the target value using mean class or the perceptron. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- numpy.ndarray Predicted target values. """ predictions = [] if self.samples_seen > 0: r, _ = get_dimensions(X) for i in range(r): if self.leaf_prediction == _TARGET_MEAN: votes = self.get_votes_for_instance( X[i]) # Gets observed data statistics if votes == {}: # Tree is empty, all target_values equal, default to zero predictions.append(0) else: number_of_samples_seen = votes[0] sum_of_values = votes[1] predictions.append(sum_of_values / number_of_samples_seen) elif self.leaf_prediction == _PERCEPTRON: if self.samples_seen > 1: perceptron_weights = self.get_weights_for_instance( X[i]) if perceptron_weights is None: predictions.append(0.0) continue normalized_sample = self.normalize_sample(X[i]) normalized_prediction = np.dot(perceptron_weights, normalized_sample) # De-normalize prediction mean = self.sum_of_values / self.samples_seen sd = compute_sd(self.sum_of_squares, self.sum_of_values, self.samples_seen) predictions.append(normalized_prediction * sd * 3 + mean) else: predictions.append(0.0) else: # Model is empty predictions.append(0.0) return np.asarray(predictions)
def normalize_target_value(self, y): """ Normalize the target in order to have the same influence during the process of training. Parameters ---------- y: float target value Returns ------- float: normalized target value """ if self.samples_seen > 1: mean = self.sum_of_values / self.samples_seen sd = compute_sd(self.sum_of_squares, self.sum_of_values, self.samples_seen) if sd > 0: return float(y - mean) / (3 * sd) return 0.0