def calculate(self, Y_hat, Y, average='micro', weights=None): Y_hat, Y = check_XY(X=Y_hat, Y=Y) Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1) return metrics.recall_score(Y, Y_hat, average=average, sample_weight=weights)
def predict_proba(self, X, *args, **kwargs): """ Predict class probabilities for each sample in `X`. Parameters ---------- X : array-like, shape=(n_samples, n_features) Data to predict. Returns ------- proba : array-like, shape=(n_samples, n_classes) Class probabilities of input data. The order of classes is in sorted ascending order. """ if not self._is_fitted(): raise RuntimeError("Model is not fitted") X = check_XY(X=X) if X.shape[1] != self.n_features_: raise ValueError( "Model takes %d features as input" % self.n_features_, "but data has %d features" % X.shape[1]) if self.verbose > 0 : print("Predicting %d samples." % \ X.shape[0]) return self._forward(X)
def calculate(self, Y_hat, Y, average='macro', weights=None): Y_hat, Y = check_XY(X=Y_hat, Y=Y) Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1) return metrics.roc_auc_score(Y, Y_hat, average=average, multi_class=self.multi_class, sample_weight=weights)
def fit(self, X, Y, weights=None): """ Train the model on the given data and labels. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training data. Y : array-like, shape=(n_samples,) Target labels as integers. weights : array-like, shape=(n_samples,), default=None Sample weights. If None, then samples are equally weighted. Returns ------- self : Base Fitted estimator. """ X, Y = check_XY(X=X, Y=Y) if self.n_classes_ is None: self.n_classes_ = len(set(decode(Y))) if self.n_features_ is None: self.n_features_ = X.shape[1] try: Y = one_hot(Y, cols=self.n_classes_) except: raise bootstrap = calculate_bootstrap(self.bootstrap_size, len(X)) batch_size = calculate_batch(self.batch_size, len(Y)) ds = BatchDataset( X, Y, weights, seed=self.random_state).shuffle().repeat().batch(bootstrap) if not self.warm_start or not self._is_fitted(): if self.verbose > 0: print("Initializing model") self._initialize() if self.verbose > 0: print("Training model for %d epochs" % self.max_iter, "on %d samples in batches of %d." % \ (X.shape[0], batch_size)) if self.verbose > 0: print("Training model with %d estimators." % self.n) if self.verbose == 1: estimators = trange(len(self.estimators_)) else: estimators = range(len(self.estimators_)) for e in estimators: if self.verbose == 1: estimators.set_description("Estimator %d" % (e + 1)) elif self.verbose > 1: print("Fitting estimator %d" % e + 1) X_, Y_, weights_ = ds.next() ds.i = 0 self.estimators_[e].fit(X_, Y_, weights=weights_) self.fitted_ = True if self.verbose > 0: print("Training complete.") return self
def fit(self, X, Y, weights=None): """ Train the model on the given data and labels. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training data. Y : array-like, shape=(n_samples,) Target labels as integers. weights : array-like, shape=(n_samples,), default=None Sample weights for ensemble. If None, then samples are equally weighted. Returns ------- self : Base Fitted estimator. """ X, Y = check_XY(X=X, Y=Y) if not self.ensemble._is_fitted(): raise ValueError("Ensemble must already be trained") if X.shape[1] != self.ensemble.n_features_: raise ValueError("Ensemble accepts data with %d features" % \ self.ensemble.n_features_, "but encountered data with %d features." % \ X.shape[1]) if self.verbose > 0 : print("Initializing and training model") if self.n_classes_ is None : self.n_classes_ = self.ensemble.n_classes_ if self.n_features_ is None : self.n_features_ = self.ensemble.n_features_ self.data_, self.targets_ = X, Y.reshape(-1) if self.verbose > 2 : print("Fitting Nearest Neighbors") self.knn.fit(X) if self.verbose > 2 : print("Scoring ensemble") for e in self.ensemble.estimators_: p = np.where(e.predict(X) == Y.reshape(-1), 1, 0) self.scores_ = np.concatenate((self.scores_, p)) self.scores_ = self.scores_.reshape(-1, len(X)).T self.fitted_ = True if self.verbose > 0 : print("Training complete.") return self
def feature_importance(self, X, Y): """ Calculate the feature importances by permuting each feature separately and measuring the increase in loss. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training data. Y : array-like, shape=(n_samples,) Target labels as integers. Returns ------- importances : list, shape=(n_features,) List of feature importances by error increase, in order of features as they appear in the data. The larger the error increase, the more important the feature. """ if not self._is_fitted(): raise RuntimeError("Model is not fitted") X, Y = check_XY(X=X, Y=Y) try: Y = one_hot(Y, cols=self.n_classes_) except: raise if X.shape[1] != self.n_features_: raise ValueError( "Model takes %d features as input" % self.n_features_, "but data has %d features" % X.shape[1]) if self.verbose > 0: print("Calculating feature importances") loss = np.exp(self.loss.loss(self.predict_proba(X), Y)) importances = [] for f in range(X.shape[1]): X_ = copy(X) self.random_state.shuffle(X_[:, f]) loss_ = self.loss.loss(self.predict_proba(X_), Y) importances.append(np.exp(loss_) / loss) return importances
def predict_proba(self, X): """ Predict class probabilities for each sample in `X`. Parameters ---------- X : array-like, shape=(n_samples, n_features) Data to predict. Returns ------- proba : array-like, shape=(n_samples, n_classes) Class probabilities of input data. The order of classes is in sorted ascending order. """ if not self._is_fitted(): raise RuntimeError("Model is not fitted") X = check_XY(X=X) if X.shape[1] != self.n_features_: raise ValueError("Model takes %d features as input" % self.n_features_, "but data has %d features" % X.shape[1]) if self.verbose > 0 : print("Predicting %d samples." % \ X.shape[0]) d, i = self.knn.kneighbors(X) competence = self._calculate_competence(i) #NE i_estimators = self._select(competence) # Ne estimators = self.ensemble.estimators_ n_estimators = i_estimators.shape[1] pred = np.zeros((len(X), self.n_classes_)) for n in range(len(X)): for e in range(n_estimators): estimator = estimators[i_estimators[n, e]] p = estimator.predict_proba(X[n].reshape(1,-1)) # NC if self.rank : p *= competence[n, i_estimators[n, e]] pred[n] += p.reshape(-1) return pred / n_estimators
def calculate(self, Y_hat, Y, weights=None): Y_hat, Y = check_XY(X=Y_hat, Y=Y) Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1) return metrics.accuracy_score(Y, Y_hat, sample_weight=weights)
def fit(self, X, Y, *args, weights=None, **kwargs): """ Train the model on the given data and labels. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training data. Y : array-like, shape=(n_samples,) Target labels as integers. weights : array-like, shape=(n_samples,), default=None Sample weights. If None, then samples are equally weighted. Returns ------- self : Base Fitted estimator. """ X, Y = check_XY(X=X, Y=Y) if self.n_classes_ is None: self.n_classes_ = len(set(decode(Y))) if self.n_features_ is None: self.n_features_ = X.shape[1] try: Y = one_hot(Y, cols=self.n_classes_) except: raise batch_size = calculate_batch(self.batch_size, len(Y)) ds = BatchDataset( X, Y, weights, seed=self.random_state).shuffle().repeat().batch(batch_size) if not self.warm_start or not self._is_fitted(): if self.verbose > 0: print("Initializing model") self._initialize() if self.verbose > 0: print("Training model for %d epochs" % self.max_iter, "on %d samples in batches of %d." % \ (X.shape[0], batch_size)) loss_prev, early_stop, e = np.inf, False, 0 if self.verbose == 1: epochs = trange(self.max_iter) else: epochs = range(self.max_iter) for e in epochs: batches = range(ds.n_batches) if self.verbose == 2: batches = trange(ds.n_batches) elif self.verbose > 2: print("Epoch %d" % e) for b in batches: X_batch, Y_batch, weights = ds.next() if len(X_batch) == 0: if self.verbose > 0: print("No more data to train. Ending training.") early_stop = True break Y_hat = self._forward(X_batch) loss = np.mean(np.sum(self.loss.loss(Y_hat, Y_batch), axis=1)) metric = self.score(Y_batch, Y_hat=Y_hat, weights=weights) msg = 'loss: %.4f' % loss + ', ' + self.metric.name + ': %.4f' % metric if self.verbose == 1: epochs.set_description(msg) elif self.verbose == 2: batches.set_description(msg) elif self.verbose > 2: print("Epoch %d, Batch %d completed." % (e + 1, b + 1), msg) if self.tol is not None and np.abs(loss - loss_prev) < self.tol: early_stop = True break self._backward(Y_hat, Y_batch, weights=weights) loss_prev = loss if early_stop: break self.fitted_ = True if self.verbose > 0: print("Training complete.") return self