def test_unique_labels_mixed_types(): # Mix of multilabel-indicator and multilabel-sequences mix_multilabel_format = product(EXAMPLES["multilabel-indicator"], EXAMPLES["multilabel-sequences"]) for y_multilabel, y_multiclass in mix_multilabel_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) # Mix with binary or multiclass and multilabel mix_clf_format = product(EXAMPLES["multilabel-indicator"] + EXAMPLES["multilabel-sequences"], EXAMPLES["multiclass"] + EXAMPLES["binary"]) for y_multilabel, y_multiclass in mix_clf_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) # Mix string and number input type assert_raises(ValueError, unique_labels, [[1, 2], [3]], [["a", "d"]]) assert_raises(ValueError, unique_labels, ["1", 2]) assert_raises(ValueError, unique_labels, [["1", 2], [3]]) assert_raises(ValueError, unique_labels, [["1", "2"], [3]]) assert_array_equal(unique_labels([(2,), (0, 2,)], [(), ()]), [0, 2]) assert_array_equal(unique_labels([("2",), ("0", "2",)], [(), ()]), ["0", "2"])
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(xrange(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabels assert_array_equal( assert_warns(DeprecationWarning, unique_labels, [(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3) ) assert_array_equal(assert_warns(DeprecationWarning, unique_labels, [[0, 1, 2], [0], list(), [2, 1]]), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5)) # Some tests with strings input assert_array_equal(unique_labels(["a", "b", "c"], ["d"]), ["a", "b", "c", "d"]) assert_array_equal( assert_warns(DeprecationWarning, unique_labels, [["a", "b"], ["c"]], [["d"]]), ["a", "b", "c", "d"] )
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(range(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabel indicator assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))
def test_unique_labels_non_specific(): """Test unique_labels with a variety of collected examples""" # Smoke test for all supported format for format in ["binary", "multiclass", "multilabel-sequences", "multilabel-indicator"]: for y in EXAMPLES[format]: unique_labels(y) # We don't support those format at the moment for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, unique_labels, example) for y_type in ["unknown", "continuous", "continuous-multioutput", "multiclass-multioutput"]: for example in EXAMPLES[y_type]: assert_raises(ValueError, unique_labels, example)
def run_intent_evaluation(config, model_path, component_builder=None): from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import f1_score from sklearn.metrics import precision_score from sklearn.utils.multiclass import unique_labels # get the metadata config from the package data test_data = load_data(config['data']) metadata = Metadata.load(model_path) interpreter = Interpreter.load(metadata, config, component_builder) test_y = [e.get("intent") for e in test_data.training_examples] preds = [] for e in test_data.training_examples: res = interpreter.parse(e.text) if res.get('intent'): preds.append(res['intent'].get('name')) else: preds.append(None) logger.info("Intent Evaluation Results") logger.info("F1-Score: {}".format(f1_score(test_y, preds, average='weighted'))) logger.info("Precision: {}".format(precision_score(test_y, preds, average='weighted'))) logger.info("Accuracy: {}".format(accuracy_score(test_y, preds))) logger.info("Classification report: \n{}".format(classification_report(test_y, preds))) cnf_matrix = confusion_matrix(test_y, preds) plot_intent_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Intent Confusion matrix') plt.show() return
def validate(self, model, test_x, test_y): pred_test_y = model.predict(test_x) cr = classification_report(test_y, pred_test_y, output_dict=True) cm = confusion_matrix(test_y, pred_test_y) validation_metrics = OrderedDict() for metric in self.metrics: v = metric(test_y, pred_test_y) validation_metrics[metric.name] = v labs = unique_labels(test_y) report = "\n" report += "\tClassificationReport for `{}`\n".format(model.name) report += "\n" report += pretty_df("Report", pd.DataFrame(cr).transpose()) report += "\n\n" report += pretty_table( "Metric results", list(validation_metrics.values()), ["Metric"], validation_metrics.keys() ) report += "\n\n" report += pretty_table("Confusion matrix", cm, labs, labs) logger.info("VClassificationReport: \n{}".format(report)) return ModelInstance.apply_config( model, validation_metrics=validation_metrics )
def confusion_matrix_instances(y_true, y_pred, labels=None): y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred) if y_type not in ("binary", "multiclass"): raise ValueError("%s is not supported" % y_type) if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) n_labels = labels.size label_to_ind = dict((y, x) for x, y in enumerate(labels)) # convert yt, yp into index y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred]) y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true]) # intersect y_pred, y_true with labels, eliminate items not in labels ind = np.logical_and(y_pred < n_labels, y_true < n_labels) y_pred = y_pred[ind] y_true = y_true[ind] CM = np.zeros((n_labels, n_labels, y_true.shape[0]), dtype=np.bool) CM[y_true, y_pred, np.arange(y_true.shape[0])] = True return CM
def fit(self, X, y): if self.activation is None: # Useful to quantify the impact of the non-linearity self._activate = lambda x: x else: self._activate = self.activations[self.activation] rng = check_random_state(self.random_state) # one-of-K coding for output values self.classes_ = unique_labels(y) Y = label_binarize(y, self.classes_) # set hidden layer parameters randomly n_features = X.shape[1] if self.rank is None: if self.density == 1: self.weights_ = rng.randn(n_features, self.n_hidden) else: self.weights_ = sparse_random_matrix( self.n_hidden, n_features, density=self.density, random_state=rng).T else: # Low rank weight matrix self.weights_u_ = rng.randn(n_features, self.rank) self.weights_v_ = rng.randn(self.rank, self.n_hidden) self.biases_ = rng.randn(self.n_hidden) # map the input data through the hidden layer H = self.transform(X) # fit the linear model on the hidden layer activation self.beta_ = np.dot(pinv2(H), Y) return self
def run_intent_evaluation(config, model_path, component_builder=None): from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels # get the metadata config from the package data test_data = load_data(config['data']) interpreter = Interpreter.load(model_path, config, component_builder) test_y = [e.get("intent") for e in test_data.training_examples] preds = [] for e in test_data.training_examples: res = interpreter.parse(e.text) if res.get('intent'): preds.append(res['intent'].get('name')) else: preds.append(None) log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Intent Confusion matrix') plt.show() return
def test_losses(): """Test loss functions""" y_true, y_pred, _ = make_prediction(binary=True) n_samples = y_true.shape[0] n_classes = np.size(unique_labels(y_true)) # Classification # -------------- with warnings.catch_warnings(True): # Throw deprecated warning assert_equal(zero_one(y_true, y_pred), 13) assert_almost_equal(zero_one(y_true, y_pred, normalize=True), 13 / float(n_samples), 2) assert_almost_equal(zero_one_loss(y_true, y_pred), 13 / float(n_samples), 2) assert_equal(zero_one_loss(y_true, y_pred, normalize=False), 13) assert_almost_equal(zero_one_loss(y_true, y_true), 0.0, 2) assert_almost_equal(zero_one_loss(y_true, y_true, normalize=False), 0, 2) assert_almost_equal(hamming_loss(y_true, y_pred), 2 * 13. / (n_samples * n_classes), 2) assert_equal(accuracy_score(y_true, y_pred), 1 - zero_one_loss(y_true, y_pred)) assert_equal(accuracy_score(y_true, y_pred, normalize=False), n_samples - zero_one_loss(y_true, y_pred, normalize=False)) with warnings.catch_warnings(True): # Throw deprecated warning assert_equal(zero_one_score(y_true, y_pred), 1 - zero_one_loss(y_true, y_pred)) # Regression # ---------- assert_almost_equal(mean_squared_error(y_true, y_pred), 12.999 / n_samples, 2) assert_almost_equal(mean_squared_error(y_true, y_true), 0.00, 2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. assert_almost_equal(mean_absolute_error(y_true, y_pred), 12.999 / n_samples, 2) assert_almost_equal(mean_absolute_error(y_true, y_true), 0.00, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), -0.04, 2) assert_almost_equal(explained_variance_score(y_true, y_true), 1.00, 2) assert_equal(explained_variance_score([0, 0, 0], [0, 1, 1]), 0.0) assert_almost_equal(r2_score(y_true, y_pred), -0.04, 2) assert_almost_equal(r2_score(y_true, y_true), 1.00, 2) assert_equal(r2_score([0, 0, 0], [0, 0, 0]), 1.0) assert_equal(r2_score([0, 0, 0], [0, 1, 1]), 0.0)
def fit(self, X, y): # Check data X, y = np.array(X), np.array(y) X, y = check_X_y(X, y) # Split to grow cascade and validate mask = np.random.random(y.shape[0]) < self.validation_fraction X_tr, X_vl = X[mask], X[~mask] y_tr, y_vl = y[mask], y[~mask] self.classes_ = unique_labels(y) self.layers_, inp_tr, inp_vl = [], X_tr, X_vl self.scores_ = [] # First layer forests = [RandomForestClassifier(max_features=1, n_estimators=self.n_estimators, min_samples_split=10, criterion='gini', n_jobs=-1), # Complete random RandomForestClassifier(max_features=1, n_estimators=self.n_estimators, min_samples_split=10, criterion='gini', n_jobs=-1), # Complete random RandomForestClassifier(n_estimators=self.n_estimators, n_jobs=-1), RandomForestClassifier(n_estimators=self.n_estimators, n_jobs=-1)] _ = [f.fit(inp_tr, y_tr) for f in forests] p_vl = [f.predict_proba(inp_vl) for f in forests] labels = [self.classes_[i] for i in np.argmax(np.array(p_vl).mean(axis=0), axis=1)] score = self.scoring(y_vl, labels) self.layers_.append(forests) self.scores_.append(score) p_tr = [cross_val_predict(f, inp_tr, y_tr, cv=self.cv, method='predict_proba') for f in forests] # Fit other layers last_score = score inp_tr, inp_vl = np.concatenate([X_tr]+p_tr, axis=1), np.concatenate([X_vl]+p_vl, axis=1) while True: # Grow cascade forests = [RandomForestClassifier(max_features=1, n_estimators=self.n_estimators, min_samples_split=10, criterion='gini', n_jobs=-1), # Complete random RandomForestClassifier(max_features=1, n_estimators=self.n_estimators, min_samples_split=10, criterion='gini', n_jobs=-1), # Complete random RandomForestClassifier(n_estimators=self.n_estimators, n_jobs=-1), RandomForestClassifier(n_estimators=self.n_estimators, n_jobs=-1)] _ = [forest.fit(inp_tr, y_tr) for forest in forests] # Fit the forest p_vl = [forest.predict_proba(inp_vl) for forest in forests] labels = [self.classes_[i] for i in np.argmax(np.array(p_vl).mean(axis=0), axis=1)] score = self.scoring(y_vl, labels) if score - last_score > self.tolerance: self.layers_.append(forests) p_tr = [cross_val_predict(f, inp_tr, y_tr, cv=self.cv, method='predict_proba') for f in forests] inp_tr, inp_vl = np.concatenate([X_tr]+p_tr, axis=1), np.concatenate([X_vl]+p_vl, axis=1) self.scores_.append(score) last_score = score print(self.scores_) else: break # Retrain on entire dataset inp_ = X for forests in self.layers_: _ = [f.fit(inp_, y) for f in forests] p = [cross_val_predict(f, inp_, y, cv=self.cv, method='predict_proba') for f in forests] inp_ = np.concatenate([X]+p, axis=1) return self
def fit(self, X, y): X, y = check_X_y(X, y) self.classes_ = unique_labels(y) self.X_ = DynamicBayesianClassifier._first_col(X) self.y_ = y self.size_ = self.X_.size for i in range(self.X_.size): if y[i] not in self.dbayesmode_major_.keys(): self.dbayesmode_major_[y[i]] = scalgoutil.DBayesMode(y[i]) self.dbayesmode_major_[y[i]].update(self.X_[i]) self.update_priors() return self
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
def score(self, X, y, **kwargs): """ Generates a 2D array where each row is the count of the predicted classes and each column is the true class Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values Returns ------- score_ : float Global accuracy score """ # We're relying on predict to raise NotFitted y_pred = self.predict(X) y_type, y_true, y_pred = _check_targets(y, y_pred) if y_type not in ("binary", "multiclass"): raise YellowbrickValueError("%s is not supported" % y_type) indices = unique_labels(y_true, y_pred) if len(self.classes_) > len(indices): raise ModelError("y and y_pred contain zero values " "for one of the specified classes") elif len(self.classes_) < len(indices): raise NotImplementedError("filtering classes is " "currently not supported") # Create a table of predictions whose rows are the true classes # and whose columns are the predicted classes; each element # is the count of predictions for that class that match the true # value of that class. self.predictions_ = np.array([ [ (y_pred[y == label_t] == label_p).sum() for label_p in indices ] for label_t in indices ]) self.draw() self.score_ = self.estimator.score(X, y) return self.score_
def plot_story_evaluation(test_y, preds, out_file): """Plot the results. of story evaluation""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(out_file, bbox_inches='tight')
def evaluate( X_train, X_test, y_train, y_test, templates_env, store_metadata, n_folds, n_jobs, paper, pool, ): pipeline = Pipeline( [ ('svd', TruncatedSVD(n_components=50)), ('nn', KNeighborsClassifier()), ] ) logger.info('Training.') pipeline.fit(X_train, y_train) logger.info('Predicting %d labels.', X_test.shape[0]) y_predicted = pipeline.predict(X_test) prfs = precision_recall_fscore_support(y_test, y_predicted) util.display( templates_env.get_template('classification_report.rst').render( argv=' '.join(sys.argv) if not util.inside_ipython() else 'ipython', paper=paper, clf=pipeline, tprfs=zip(unique_labels(y_test, y_predicted), *prfs), p_avg=np.average(prfs[0], weights=prfs[3]), r_avg=np.average(prfs[1], weights=prfs[3]), f_avg=np.average(prfs[2], weights=prfs[3]), s_sum=np.sum(prfs[3]), store_metadata=store_metadata, accuracy=accuracy_score(y_test, y_predicted), ) ) pd.DataFrame(y_predicted).to_csv('out.csv') pd.DataFrame(y_test).to_csv('y_test.csv')
def do_full_svm(self): self.accuracies = [0]*self.iterations this_round = [] for i in range(self.iterations): self.clf = LinearSVC() self.clf.fit(self.x_train_arr[i], self.y_train_arr[i]) svm_prediction = self.clf.predict(self.x_test_arr[i]) this_round.append( metrics.precision_recall_fscore_support(y_true=self.y_test_arr[i], y_pred=svm_prediction) ) self.accuracies[i] = metrics.accuracy_score(self.y_test_arr[i], svm_prediction) self.labels = unique_labels(self.y_test_arr[i], svm_prediction) print ".", self.performance = np.mean(this_round, axis=0) print ""
def evaluate_intents(intent_results, errors_filename, confmat_filename, intent_hist_filename, ): # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Log samples which could not be classified correctly and save them to file. Creates a confidence histogram which is saved to file. Only considers those examples with a set intent. Others are filtered out.""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt # remove empty intent targets num_examples = len(intent_results) intent_results = remove_empty_intent_examples(intent_results) logger.info("Intent Evaluation: Only considering those " "{} examples that have a defined intent out " "of {} examples".format(len(intent_results), num_examples)) targets, predictions = _targets_predictions_from(intent_results) log_evaluation_table(targets, predictions) # log and save misclassified samples to file for debugging errors = collect_nlu_errors(intent_results) if errors: save_nlu_errors(errors, errors_filename) cnf_matrix = confusion_matrix(targets, predictions) labels = unique_labels(targets, predictions) plot_confusion_matrix(cnf_matrix, classes=labels, title='Intent Confusion matrix', out=confmat_filename) plt.show() plot_intent_confidences(intent_results, intent_hist_filename) plt.show()
def evaluate_intents(targets, predictions): # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Only considers those examples with a set intent. Others are filtered out. """ from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt # remove empty intent targets num_examples = len(targets) targets, predictions = remove_empty_intent_examples(targets, predictions) logger.info("Intent Evaluation: Only considering those {} examples that " "have a defined intent out of {} examples".format(targets.size, num_examples)) log_evaluation_table(targets, predictions) cnf_matrix = confusion_matrix(targets, predictions) plot_confusion_matrix(cnf_matrix, classes=unique_labels(targets, predictions), title='Intent Confusion matrix') plt.show()
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(xrange(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabels assert_array_equal(unique_labels([(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3)) assert_array_equal(unique_labels([[0, 1, 2], [0], list(), [2, 1]]), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))
def fit(self, X, y): """A reference implementation of a fitting function for a classifier. Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. An array of int. Returns ------- self : object Returns self. """ # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen durind fit self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # Return the classifier return self
def fit(self, X, y): """Fits the classifier Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. An array of int. Returns ------- self : object Returns self. """ # Check that X and y have correct shape self._validate_x(X) y = self._validate_y(y) # Store the classes seen during fit self.classes_ = unique_labels(y) self.num_classes_ = len(self.classes_) self.class_labels_ = [ '__label__{}'.format(lbl) for lbl in self.classes_] # Dump training set to a fasttext-compatible file temp_trainset_fpath = temp_dataset_fpath() input_col = self._input_col(X) dump_xy_to_fasttext_format(input_col, y, temp_trainset_fpath) # train self.model = train_supervised( input=temp_trainset_fpath, **self.kwargs) # Return the classifier try: os.remove(temp_trainset_fpath) except FileNotFoundError: # pragma: no cover pass return self
def fit(self, X, y): """ Fit the VTT classifier model Args: X (sparse matrix, shape = [n_samples, n_features]) : Training data y (array-like, shape = [n_samples]) : Target values """ self.classes_ = unique_labels(y) X = csr_matrix(X, dtype=bool)#.tocsr() pvals = X[np.array(y==1),:].mean(axis=0) nvals = X[np.array(y!=1),:].mean(axis=0) self.coef_ = self.__get_vtt_angles(pvals, nvals).toarray() pnvals = (nvals + pvals).T # this if is necessary in case the bias is being set by GridSearchCV if self.intercept_ is None: self.intercept_ = -(self.coef_.dot(pnvals)/2.0)[0,0] for b, val in self.B.items(): self.coef_[0,b] = 1./val return self
def _init_classes(self, y): """Map all possible classes to the range [0,..,C-1] Parameters ---------- y : list of arrays of int, each element has shape=[samples_i,] Labels of the samples for each subject Returns ------- new_y : list of arrays of int, each element has shape=[samples_i,] Mapped labels of the samples for each subject Note ---- The mapping of the classes is saved in the attribute classes_. """ self.classes_ = unique_labels(utils.concatenate_list(y)) new_y = [None] * len(y) for s in range(len(y)): new_y[s] = np.digitize(y[s], self.classes_) - 1 return new_y
def classification_report(y_true, y_pred): ''' Computes classification metrics :param y_true - original class label :param y_pred - predicted class label :return macro_f1_measure ''' labels = unique_labels(y_true, y_pred) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None) f1_macro = 0 precision_macro = 0 recall_macro = 0 for i, label in enumerate(labels): f1_macro += f1[i] precision_macro += p[i] recall_macro += r[i] macro_f1_measure = f1_macro / labels.size return macro_f1_measure
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, filename='confusion.png'): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ from sklearn.metrics import confusion_matrix, accuracy_score from sklearn.utils.multiclass import unique_labels np.set_printoptions(precision=2) print("starting plotting confusion", filename) acc = accuracy_score(y_true, y_pred) if not title: if normalize: title = 'Normalized Acc %.02f' % acc else: title = 'Acc %.02f' % acc # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = [classes[n] for n in list(unique_labels(y_true, y_pred))] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots(figsize=(20, 20)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() plt.savefig(filename) plt.close() print("finished plotting confusion", filename) return cm
def classifier_evaluation(self, classifier=1, kfold=10): s = Classification_gender # this class x_train_list, x_test_list, y_train_list, y_test_list, target_values, featureNames= \ s.readAndSplitKFoldsData('self', kfold) accuracy = [] precision_list = [] recall_list = [] fscore_list = [] suport_list = [] labels_list = [] for x_train, y_train, x_test, y_test in zip(x_train_list, y_train_list, x_test_list, y_test_list): #print(x_test) x_train = np.array(x_train) y_train = np.array(y_train) x_test = np.array(x_test) y_test = np.array(y_test) #x_train = preprocessing.normalize(x_train, norm='l2') #x_test =preprocessing.normalize(x_test, norm='l2') #x_train = preprocessing.scale(x_train) #x_test = preprocessing.scale(x_test) #print(y_test) print(x_train[0:, 3:].shape) print(y_train.shape) print(x_test[0:, 3:].shape) #print(x_train[0:, 3:]) y_pred = Classifiers.run_classifier(self, x_train[0:, 3:], y_train, x_test[0:, 3:], classifier=classifier) t = 0 f = 0 for y, y_t in zip(y_pred, y_test): if (y == y_t): t += 1 # print(str(y) + " _ " + str(y_t)) else: f += 1 # print(str(y) + " _ " + str(y_t) + ' Error') print(classification_report(y_test, y_pred)) print("True: " + str(t) + " False: " + str(f)) print("accuracy: " + str(round((t / (t + f)), 3)) + "\n") accuracy.append(t / (t + f)) labels = unique_labels(y_test, y_pred) labels_list.append(labels) #print(labels) pr, rec, fs, sup = precision_recall_fscore_support(y_test, y_pred) precision_list.append(pr) recall_list.append(rec) fscore_list.append(fs) suport_list.append(sup) labels, precision, recall, fscore, support, avg_precision, avg_recall, avg_fscore, total_support = \ s.meanOfLists(self, labels_list, precision_list, recall_list, fscore_list, suport_list) #labels = ["1_aspr", "2_stol", "3_apal_p", "4_apal_x", "5_elafr", "6_kokkin", "7_oplism", "8_malak", "9_geros", "10_pist" ] labels = vs_gender.VectorSpace_gender.genderValues print('%-14s%-14s%-14s%-14s%-14s' % ("Gender", "Precision", "Recall", "F1-score", "Support")) #labels = labels_list[0] for l, p, r, f, s in zip(labels, precision, recall, fscore, support): tuple = (l, round(p, 3), round(r, 3), round(f, 3), int(round(s))) print('%-14s%-14s%-14s%-14s%-14s' % tuple) tuple = ('\nAvg/Total', round(avg_precision, 3), round(avg_recall, 3), round(avg_fscore, 3), int(round(total_support))) print('%-14s%-14s%-14s%-14s%-14s' % tuple) print("avg accuracy: " + str(round(np.mean(accuracy), 3))) print(featureNames[4:]) return avg_fscore
ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() ## Bug ??? - sem isso não plota corretamente ax.margins(x=0, y=0) ############## return ax # %% from sklearn.utils.multiclass import unique_labels plot_confusion_matrix(matrix_y_test.astype(int), (matrix_ye_test >= thresholds[index_max]).astype(int), unique_labels([0,1])); # %% [markdown] # # Parte 2 - Classificação multi-classe # # ## Item a # # Técnica adotada: **Um contra todos** # # A ideia aqui é fazer 5 classificadores, um para cada classe. # %% from sklearn.model_selection import train_test_split from sklearn import preprocessing import csv
def plot_confusion_matrix(y_true, y_pred, labels=None, true_labels=None, pred_labels=None, title=None, normalize=False, hide_zeros=False, hide_counts=False, x_tick_rotation=0, ax=None, figsize=None, cmap='Blues', title_fontsize="large", text_fontsize="medium"): """Generates confusion matrix plot from predictions and true labels Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_pred (array-like, shape (n_samples)): Estimated targets as returned by a classifier. labels (array-like, shape (n_classes), optional): List of labels to index the matrix. This may be used to reorder or select a subset of labels. If none is given, those that appear at least once in ``y_true`` or ``y_pred`` are used in sorted order. (new in v0.2.5) true_labels (array-like, optional): The true labels to display. If none is given, then all of the labels are used. pred_labels (array-like, optional): The predicted labels to display. If none is given, then all of the labels are used. title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if `normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. hide_zeros (bool, optional): If True, does not plot cells containing a value of zero. Defaults to False. hide_counts (bool, optional): If True, doe not overlay counts. Defaults to False. x_tick_rotation (int, optional): Rotates x-axis tick labels by the specified angle. This is useful in cases where there are numerous categories and the labels overlap each other. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. cmap (string or :class:`matplotlib.colors.Colormap` instance, optional): Colormap used for plotting the projection. View Matplotlib Colormap documentation for available options. https://matplotlib.org/users/colormaps.html title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> import scikitplot as skplt >>> rf = RandomForestClassifier() >>> rf = rf.fit(X_train, y_train) >>> y_pred = rf.predict(X_test) >>> skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ y_true = np.asarray(y_true) y_pred = np.asarray(y_pred) if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) cm = confusion_matrix(y_true, y_pred, labels=labels) if labels is None: classes = unique_labels(y_true, y_pred) else: classes = np.asarray(labels) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] cm = np.around(cm, decimals=2) cm[np.isnan(cm)] = 0.0 if true_labels is None: true_classes = classes else: validate_labels(classes, true_labels, "true_labels") true_label_indexes = np.in1d(classes, true_labels) true_classes = classes[true_label_indexes] cm = cm[true_label_indexes] if pred_labels is None: pred_classes = classes else: validate_labels(classes, pred_labels, "pred_labels") pred_label_indexes = np.in1d(classes, pred_labels) pred_classes = classes[pred_label_indexes] cm = cm[:, pred_label_indexes] if title: ax.set_title(title, fontsize=title_fontsize) elif normalize: ax.set_title('Normalized Confusion Matrix', fontsize=title_fontsize) else: ax.set_title('Confusion Matrix', fontsize=title_fontsize) image = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.get_cmap(cmap)) plt.colorbar(mappable=image) x_tick_marks = np.arange(len(pred_classes)) y_tick_marks = np.arange(len(true_classes)) ax.set_xticks(x_tick_marks) ax.set_xticklabels(pred_classes, fontsize=text_fontsize, rotation=x_tick_rotation) ax.set_yticks(y_tick_marks) ax.set_yticklabels(true_classes, fontsize=text_fontsize) thresh = cm.max() / 2. if not hide_counts: for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if not (hide_zeros and cm[i, j] == 0): ax.text(j, i, cm[i, j], horizontalalignment="center", verticalalignment="center", fontsize=text_fontsize, color="white" if cm[i, j] > thresh else "black") ax.set_ylabel('True label', fontsize=text_fontsize) ax.set_xlabel('Predicted label', fontsize=text_fontsize) ax.grid(False) return ax
def fit(self, X, y, vectorized=None): """Fits SVM classifer. Parameters ---------- X : np.ndarray, shape (-1, n) Input. y : np.array, shape (n) Targets vectorized : bool, default None Whether to use the vectorized/non-vectorized loss function. If using nonlinear kernel, then this must be false (until I fix it). If None, then vectorized will default to True if kernel is linear, and False if kernel is nonlinear. Returns ------- """ # My Input validation if self.kernel.name != 'linear' and vectorized: msg = 'Vectorized loss only works with linear kernel right now.' raise ValueError(msg) if vectorized is None: if self.kernel.name == 'linear': vectorized = True else: vectorized = False # Sklearn input validation X, y = check_X_y(X, y) # Check that X and y have correct shape self.classes_ = unique_labels(y) # Store the classes seen during fit if vectorized: loss = self._vectorized_loss else: loss = self._loss # SVM needs 1s and -1s y[y == 0] = -1 initial_alphas = np.random.rand(len(X)) # Define constraints # # Our constraints: # 1. sum_i(ai*yi)=0 # 2. ai >= 0 # # Scipy LinearConstraint format: # lb <= A.dot(x) <= ub # # Therefore: # Constraint 1: # A = di # lb = 0 # ub = 0 # Constraint 2: # A = 1 # lb = 0 # ub = np.inf # con1 = optimize.LinearConstraint(y, 0, 0) con2 = {'type': 'ineq', 'fun': lambda a: a} self.opt_result_ = optimize.minimize(loss, initial_alphas, constraints=(con1, con2), args=(X, y)) # Find indices of support vectors sv_idx = np.where(self.opt_result_.x > 0.001) self.sup_X_ = X[sv_idx] self.sup_y_ = y[sv_idx] self.sup_alphas_ = self.opt_result_.x[sv_idx] self.offset_ = self._compute_offset() return self
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, filename="confusion_matrix.png"): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. src: https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data labels = unique_labels(y_true, y_pred) classes = classes[labels] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() fig.savefig(os.path.join(plot_dir, filename)) plt.close(fig)
def test_nn(w1, w2, w3, w4, b1, b2, b3, b4, x_test, y_test, num_classes): n = x_test.shape[0] classifications = four_nn(x_test, w1, w2, w3, w4, b1, b2, b3, b4, y_test, True) totalCorrect = 0 # Total correct counter # confusionMat = np.zeros((num_classes, num_classes)) for i in range(len(classifications)): if classifications[i] == y_test[i]: totalCorrect += 1 # Plot here title = 'Normalized confusion matrix' # Compute confusion matrix cm = confusion_matrix(y_test, classifications) classes = np.array([ "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot" ]) # Only use the labels that appear in the data classes = classes[unique_labels(y_test, classifications)] # Normalize confusino matrix cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() avg_class_rate = totalCorrect / n class_rate_per_class = [0.0] * num_classes for i in range(num_classes): class_rate_per_class[i] = cm[i, i] print(">>> avg_class_rate =", avg_class_rate) print(">>> class_rate_per_class =", class_rate_per_class) plt.show() return avg_class_rate, class_rate_per_class
def classificationReportDict(trueY, predY, labels=None, targetNames=None, sampleWeight=None, alpha=0.1): report = dict() if labels is None: labels = unique_labels(trueY, predY) else: labels = np.asarray(labels) if targetNames is None: targetNames = [str(label) for label in labels] # Precision Recall F1 Support precision, recall, f1, support = \ precision_recall_fscore_support(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Specificity specificity = specificity_score(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Geometric mean gMean = geometric_mean_score(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Index balanced accuracy ibaGMeanScore = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) ibaGMean = ibaGMeanScore(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) for i, label in enumerate(labels): targetName = targetNames[i] report[targetName] = { 'Precision': precision[i], 'Recall': recall[i], 'F1': f1[i], 'Specificity': specificity[i], 'GMean': gMean[i], 'IbaGMean': ibaGMean[i], 'Support': support[i], } report['Weighted Avg'] = { 'Precision': np.average(precision, weights=support), 'Recall': np.average(recall, weights=support), 'F1': np.average(f1, weights=support), 'Specificity': np.average(specificity, weights=support), 'GMean': np.average(gMean, weights=support), 'IbaGMean': np.average(ibaGMean, weights=support), 'Support': np.sum(support) } report['Macro Avg'] = { 'Precision': np.average(precision), 'Recall': np.average(recall), 'F1': np.average(f1), 'Specificity': np.average(specificity), 'GMean': np.average(gMean), 'IbaGMean': np.average(ibaGMean), 'Support': np.sum(support) } # Accuracy accuracy = accuracy_score(trueY, predY, normalize=True, sample_weight=sampleWeight) report['Accuracy'] = accuracy return report
def y2scores(y, labels=None): labels = unique_labels(y) if labels is None else np.asarray(labels) scores = np.zeros((y.shape[0], labels.shape[0])) for n, label in enumerate(labels): scores[y == label, n] = 1 return scores, labels
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, save_path: Path = Path("./")): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] fig, ax = plt.subplots(figsize=(10, 10)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, fontsize=25) plt.yticks(tick_marks, fontsize=25) plt.xlabel('Predicted label', fontsize=25) plt.ylabel('True label', fontsize=25) plt.title(title, fontsize=30) divider = make_axes_locatable(ax) cax = divider.append_axes('right', size="5%", pad=0.15) cbar = ax.figure.colorbar(im, ax=ax, cax=cax) cbar.ax.tick_params(labelsize=20) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, # title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), fontsize=20, ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() plt.savefig(save_path)
def fit(self, X, y, sample_weight=None): """Fit training data. Args: X : X training vector y : y label vector sample_weight (optional): Required for compatibility with the scikit-learn Adaboost module. Defaults to None. Returns: self : Required for compatibility with the scikit-learn Adaboost module. """ self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # for each feature in X for j in range(X.shape[1]): # S = { (x_1, y_1), (x_2, y_2), ..., (x_m, y_m) } S = np.hstack((X, np.array([y]).T)) # sort(S) using jth coordinate s.t. x1j <= x2j <= ... <= xmj # where j is the column and 1,2,...m are the rows S = S[np.argsort(S[:, j])] # remove duplicate values from consideration # this can cut down on the number of thresholds that are tested keys, indices = np.unique(S[:, j], return_index=True) unique = S[indices] # for each row in X[:, split], test the midpoint between consecutive # feature values to see if it would make a good threshold for row in range(0, unique.shape[0] - 1): # check the threshold using the greater than lambda # save the old configuration old_split = self.__split old_thresh = self.__threshold old_lambda = self.__inequality # assign a new configuration self.__inequality = self.__greater self.__split = j self.__threshold = unique[row, j] + ( (unique[row + 1, j] - unique[row, j]) / 2.0) y_pred = self.predict(X) error = len(y_pred[y_pred != y]) if error >= self.__error: self.__split = old_split self.__threshold = old_thresh self.__inequality = old_lambda else: self.__error = error # check the threshold using the less than lambda # save the old configuration old_split = self.__split old_thresh = self.__threshold old_lambda = self.__inequality # assign a new configuration self.__inequality = self.__lesser self.__split = j self.__threshold = unique[row, j] + ( (unique[row + 1, j] - unique[row, j]) / 2.0) y_pred = self.predict(X) error = len(y_pred[y_pred != y]) if error >= self.__error: self.__split = old_split self.__threshold = old_thresh self.__inequality = old_lambda else: self.__error = error return self
def remap_labels( y_true: Union[List, np.ndarray, pd.Series], y_pred: Union[List, np.ndarray, pd.Series], return_map: bool = False, ) -> Union[np.ndarray, Tuple[np.ndarray, Dict]]: """ Remaps a categorical labeling (such as one predicted by a clustering algorithm) to match the labels used by another similar labeling. Given two :math:`n`-length vectors describing a categorical labeling of :math:`n` samples, this method reorders the labels of the second vector (`y_pred`) so that as many samples as possible from the two label vectors are in the same category. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth labels, or, labels to map to. y_pred : array-like of shape (n_samples,) Labels to remap to match the categorical labeling of `y_true`. The categorical labeling of `y_pred` will be preserved exactly, but the labels used to denote the categories will be changed to best match the categories used in `y_true`. return_map : bool, optional Whether to return a dictionary where the keys are the original category labels from `y_pred` and the values are the new category labels that they were mapped to. Returns ------- remapped_y_pred : np.ndarray of shape (n_samples,) Same categorical labeling as that of `y_pred`, but with the category labels permuted to best match those of `y_true`. label_map : dict Mapping from the original labels of `y_pred` to the new labels which best resemble those of `y_true`. Only returned if `return_map` was True. Examples -------- >>> y_true = np.array([0,0,1,1,2,2]) >>> y_pred = np.array([2,2,1,1,0,0]) >>> remap_labels(y_true, y_pred) array([0, 0, 1, 1, 2, 2]) Notes ----- This method will work well when the label vectors describe a somewhat similar categorization of the data (as measured by metrics such as :func:`sklearn.metrics.adjusted_rand_score`, for example). When the categorizations are not similar, the remapping may not make sense (as such a remapping does not exist). For example, consider when one category in `y_true` is exactly split in half into two categories in `y_pred`. If this is the case, it is impossible to say which of the categories in `y_pred` match that original category from `y_true`. """ check_consistent_length(y_true, y_pred) true_type = type_of_target(y_true) pred_type = type_of_target(y_pred) valid_target_types = {"binary", "multiclass"} if (true_type not in valid_target_types) or (pred_type not in valid_target_types): msg = "Elements of `y_true` and `y_pred` must represent a valid binary or " msg += "multiclass labeling, see " msg += "https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html" msg += " for more information." raise ValueError(msg) y_true = column_or_1d(y_true) y_pred = column_or_1d(y_pred) if not isinstance(return_map, bool): raise TypeError("return_map must be of type bool.") labels = unique_labels(y_true, y_pred) confusion_mat = confusion_matrix(y_true, y_pred, labels=labels) row_inds, col_inds = linear_sum_assignment(confusion_mat, maximize=True) label_map = dict(zip(labels[col_inds], labels[row_inds])) remapped_y_pred: np.ndarray = np.vectorize(label_map.get)(y_pred) if return_map: return remapped_y_pred, label_map else: return remapped_y_pred
from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix import pandas as pd import matplotlib.pyplot as plt from sklearn.utils.multiclass import unique_labels import numpy as np # import some data to play with filename_LCP = "dataset11.csv" df_LCP = pd.read_csv(filename_LCP) df1 = pd.DataFrame(df_LCP) X = df1.drop('Lanemarker', axis=1) y = df1['Lanemarker'].astype(np.int64) class_names = unique_labels(df1['Lanemarker']) # Split the data into a training set and a test set X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Run classifier classifier = svm.SVC(kernel='linear') y_pred = classifier.fit(X_train, y_train).predict(X_test) # Compute confusion matrix cm = confusion_matrix(y_test, y_pred) print(cm) # Show confusion matrix in a separate window plt.matshow(cm) plt.title('Confusion matrix') plt.colorbar()
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data print(type(unique_labels(y_true, y_pred)), unique_labels(y_true, y_pred)) classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
def classification_report(y_true, y_pred): ''' Computes clasification metrics :param y_true - original class label :param y_pred - predicted class label :return presicion, recall for each class; micro_f1 measure, macro_f1 measure ''' last_line_heading = 'avg / total' final_line_heading = 'final score' labels = unique_labels(y_true, y_pred) width = len(last_line_heading) target_names = ['{0}'.format(l) for l in labels] headers = ["precision", "recall", "f1-score", "support"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None) f1_macro = 0 precision_macro = 0 recall_macro = 0 for i, label in enumerate(labels): values = [target_names[i]] f1_macro += f1[i] precision_macro += p[i] recall_macro += r[i] for v in (p[i], r[i], f1[i]): values += ["{0:0.5f}".format(v)] values += ["{0}".format(s[i])] report += fmt % tuple(values) report += '\n' # compute averages values = [last_line_heading] for v in (np.average(p, weights=s), np.average(r, weights=s), np.average(f1, weights=s)): values += ["{0:0.5f}".format(v)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) values = [final_line_heading] for v in (precision_macro, recall_macro, f1_macro): values += ["{0:0.5f}".format(v / labels.size)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) return report
def evaluate_intents( intent_results: List[IntentEvaluationResult], report_folder: Optional[Text], successes_filename: Optional[Text], errors_filename: Optional[Text], confmat_filename: Optional[Text], intent_hist_filename: Optional[Text], output_folder: Optional[Text] = None, ) -> Dict: # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Log samples which could not be classified correctly and save them to file. Creates a confidence histogram which is saved to file. Wrong and correct prediction confidences will be plotted in separate bars of the same histogram plot. Only considers those examples with a set intent. Others are filtered out. Returns a dictionary of containing the evaluation result.""" # remove empty intent targets num_examples = len(intent_results) intent_results = remove_empty_intent_examples(intent_results) logger.info("Intent Evaluation: Only considering those " "{} examples that have a defined intent out " "of {} examples".format(len(intent_results), num_examples)) target_intents, predicted_intents = _targets_predictions_from( intent_results) if report_folder: report, precision, f1, accuracy = get_evaluation_metrics( target_intents, predicted_intents, output_dict=True) report_filename = os.path.join(report_folder, "intent_report.json") utils.write_json_to_file(report_filename, report) logger.info( "Classification report saved to {}.".format(report_filename)) else: report, precision, f1, accuracy = get_evaluation_metrics( target_intents, predicted_intents) if isinstance(report, str): log_evaluation_table(report, precision, f1, accuracy) if successes_filename: if output_folder: successes_filename = os.path.join(output_folder, successes_filename) # save classified samples to file for debugging collect_nlu_successes(intent_results, successes_filename) if errors_filename: if output_folder: errors_filename = os.path.join(output_folder, errors_filename) # log and save misclassified samples to file for debugging collect_nlu_errors(intent_results, errors_filename) if confmat_filename: from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt if output_folder: confmat_filename = os.path.join(output_folder, confmat_filename) intent_hist_filename = os.path.join(output_folder, intent_hist_filename) cnf_matrix = confusion_matrix(target_intents, predicted_intents) labels = unique_labels(target_intents, predicted_intents) plot_confusion_matrix( cnf_matrix, classes=labels, title="Intent Confusion matrix", out=confmat_filename, ) plt.show(block=False) plot_intent_confidences(intent_results, intent_hist_filename) plt.show(block=False) predictions = [{ "text": res.message, "intent": res.intent_target, "predicted": res.intent_prediction, "confidence": res.confidence, } for res in intent_results] return { "predictions": predictions, "report": report, "precision": precision, "f1_score": f1, "accuracy": accuracy, }
def confusion_matrix(self, threshold, classes, test_label=None, plot_type='test', normalize=False, title=None, cmap=plt.cm.Blues): """ Refer to: https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if plot_type == 'train': y_true = self.y_train y_pred = np.where(self.y_oof > threshold, 1, 0) elif plot_type == 'test': y_true = test_label y_pred = np.where(self.y_pred > threshold, 1, 0) if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots(figsize=(7, 7)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, fontsize=20) plt.yticks(tick_marks, fontsize=20) plt.xlabel('Predicted label', fontsize=20) plt.ylabel('True label', fontsize=20) plt.title(title, fontsize=20) divider = make_axes_locatable(ax) cax = divider.append_axes('right', size="5%", pad=0.15) cbar = ax.figure.colorbar(im, ax=ax, cax=cax) cbar.ax.tick_params(labelsize=20) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, # title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), fontsize=20, ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
def plot_confusion_matrix(model, partition): r"""Draw the confusion matrix. Parameters ---------- model : alphapy.Model The model object with plotting specifications. partition : alphapy.Partition Reference to the dataset. Returns ------- None : None References ---------- http://scikit-learn.org/stable/modules/model_evaluation.html#confusion-matrix """ logger.info("Generating Confusion Matrices") plot_dir = get_plot_directory(model) pstring = datasets[partition] # For classification only if model.specs['model_type'] != ModelType.classification: logger.info('Confusion Matrix is for classification only') return None # Get X, Y for correct partition. X, y = get_partition_data(model, partition) # Plot Parameters np.set_printoptions(precision=2) cmap = plt.cm.Blues fmt = '.2f' # Generate a Confusion Matrix for each algorithm for algo in model.algolist: logger.info("Confusion Matrix for Algorithm: %s", algo) # get predictions for this partition y_pred = model.preds[(algo, partition)] # compute confusion matrix cm = confusion_matrix(y, y_pred) logger.info('Confusion Matrix:') logger.info('%s', cm) # normalize confusion matrix cm_pct = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # initialize plot _, ax = plt.subplots() # set the title of the confusion matrix title = algo + " Confusion Matrix: " + pstring + " [" + str( np.sum(cm)) + "]" plt.title(title) # only use the labels that appear in the data classes = unique_labels(y, y_pred) # show all ticks ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes, title=title, ylabel='True Label', xlabel='Predicted Label') # rotate the tick labels and set their alignment plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # loop over data dimensions and create text annotations thresh = (cm_pct.max() + cm_pct.min()) / 2.0 for i in range(cm.shape[0]): for j in range(cm.shape[1]): cm_text = format(cm_pct[i, j], fmt) + " [" + str(cm[i, j]) + "]" ax.text(j, i, cm_text, ha="center", va="center", color="white" if cm_pct[i, j] >= thresh else "black") # show the color bar im = ax.imshow(cm_pct, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # save the chart tag = USEP.join([pstring, algo]) write_plot('matplotlib', plt, 'confusion', tag, plot_dir)
def confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None, normalize=None): """Compute confusion matrix to evaluate the accuracy of a classification. By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}` is equal to the number of observations known to be in group :math:`i` and predicted to be in group :math:`j`. Thus in binary classification, the count of true negatives is :math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is :math:`C_{1,1}` and false positives is :math:`C_{0,1}`. Read more in the :ref:`User Guide <confusion_matrix>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated targets as returned by a classifier. labels : array-like of shape (n_classes), default=None List of labels to index the matrix. This may be used to reorder or select a subset of labels. If ``None`` is given, those that appear at least once in ``y_true`` or ``y_pred`` are used in sorted order. sample_weight : array-like of shape (n_samples,), default=None Sample weights. .. versionadded:: 0.18 normalize : {'true', 'pred', 'all'}, default=None Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized. Returns ------- C : ndarray of shape (n_classes, n_classes) Confusion matrix whose i-th row and j-th column entry indicates the number of samples with true label being i-th class and prediced label being j-th class. References ---------- .. [1] `Wikipedia entry for the Confusion matrix <https://en.wikipedia.org/wiki/Confusion_matrix>`_ (Wikipedia and other references may use a different convention for axes) Examples -------- >>> from sklearn.metrics import confusion_matrix >>> y_true = [2, 0, 2, 2, 0, 1] >>> y_pred = [0, 0, 2, 2, 0, 2] >>> confusion_matrix(y_true, y_pred) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) >>> y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] >>> y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] >>> confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"]) array([[2, 0, 0], [0, 0, 1], [1, 0, 2]]) In the binary case, we can extract true positives, etc as follows: >>> tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel() >>> (tn, fp, fn, tp) (0, 2, 1, 1) """ y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type not in ("binary", "multiclass"): raise ValueError("%s is not supported" % y_type) if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) n_labels = labels.size if n_labels == 0: raise ValueError("'labels' should contains at least one label.") elif y_true.size == 0: return np.zeros((n_labels, n_labels), dtype=np.int) elif np.all([l not in y_true for l in labels]): raise ValueError("At least one label specified must be in y_true") if sample_weight is None: sample_weight = np.ones(y_true.shape[0], dtype=np.int64) else: sample_weight = np.asarray(sample_weight) check_consistent_length(y_true, y_pred, sample_weight) if normalize not in ['true', 'pred', 'all', None]: raise ValueError("normalize must be one of {'true', 'pred', " "'all', None}") n_labels = labels.size label_to_ind = {y: x for x, y in enumerate(labels)} # convert yt, yp into index y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred]) y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true]) # intersect y_pred, y_true with labels, eliminate items not in labels ind = np.logical_and(y_pred < n_labels, y_true < n_labels) y_pred = y_pred[ind] y_true = y_true[ind] # also eliminate weights of eliminated items sample_weight = sample_weight[ind] # Choose the accumulator dtype to always have high precision if sample_weight.dtype.kind in {'i', 'u', 'b'}: dtype = np.int64 else: dtype = np.float64 cm = coo_matrix( (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), dtype=dtype, ).toarray() with np.errstate(all='ignore'): if normalize == 'true': cm = cm / cm.sum(axis=1, keepdims=True) elif normalize == 'pred': cm = cm / cm.sum(axis=0, keepdims=True) elif normalize == 'all': cm = cm / cm.sum() cm = np.nan_to_num(cm) return cm
y_pos = np.arange(len(side_effects)) plt.bar(y_pos, counts, color=(0.5, 0.1, 0.5, 0.6)) plt.title('Distribution of side effect counts') plt.xlabel('side effect') plt.ylabel('count') plt.xticks(y_pos, counts) plt.show() # In[121]: from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.cm as cm matrix = confusion_matrix(labels_test, preds) classes = unique_labels(labels_test, preds) matrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis] fig, ax = plt.subplots() im = ax.imshow(matrix, interpolation='nearest', cmap=cm.RdPu) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(matrix.shape[1]), yticks=np.arange(matrix.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title="SVM DDI Severity Classification: Confusion Matrix", ylabel='True label', xlabel='Predicted label')
def sensitivity_specificity_support(y_true, y_pred, labels=None, pos_label=1, average=None, warn_for=('sensitivity', 'specificity'), sample_weight=None): """Compute sensitivity, specificity, and support for each class The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. The sensitivity quantifies the ability to avoid false negatives_[1]. The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of true negatives and ``fn`` the number of false negatives. The specificity quantifies the ability to avoid false positives_[1]. The support is the number of occurrences of each class in ``y_true``. If ``pos_label is None`` and in binary classification, this function returns the average sensitivity and specificity if ``average`` is one of ``'weighted'``. Read more in the :ref:`User Guide <sensitivity_specificity>`. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, optional (default=1) The class to report if ``average='binary'`` and the data is binary. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : str or None, optional (default=None) If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). warn_for : tuple or set, for internal use This determines which warnings will be made in the case that this function is being used to return only one of its metrics. sample_weight : ndarray, shape (n_samples, ) Sample weights. Returns ------- sensitivity : float (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) specificity : float (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) support : int (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) The number of occurrences of each label in ``y_true``. References ---------- .. [1] `Wikipedia entry for the Sensitivity and specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ Examples -------- >>> import numpy as np >>> from imblearn.metrics import sensitivity_specificity_support >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> sensitivity_specificity_support(y_true, y_pred, average='macro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='micro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='weighted') (0.33333333333333331, 0.66666666666666663, None) """ average_options = (None, 'micro', 'macro', 'weighted', 'samples') if average not in average_options and average != 'binary': raise ValueError('average has to be one of ' + str(average_options)) y_type, y_true, y_pred = _check_targets(y_true, y_pred) present_labels = unique_labels(y_true, y_pred) if average == 'binary': if y_type == 'binary': if pos_label not in present_labels: if len(present_labels) < 2: # Only negative labels return (0., 0., 0) else: raise ValueError("pos_label=%r is not a valid label: %r" % (pos_label, present_labels)) labels = [pos_label] else: raise ValueError("Target is %s but average='binary'. Please " "choose another average setting." % y_type) elif pos_label not in (None, 1): warnings.warn( "Note that pos_label (set to %r) is ignored when " "average != 'binary' (got %r). You may use " "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning) if labels is None: labels = present_labels n_labels = None else: n_labels = len(labels) labels = np.hstack( [labels, np.setdiff1d(present_labels, labels, assume_unique=True)]) # Calculate tp_sum, pred_sum, true_sum ### if y_type.startswith('multilabel'): raise ValueError('imblearn does not support multilabel') elif average == 'samples': raise ValueError("Sample-based precision, recall, fscore is " "not meaningful outside multilabel " "classification. See the accuracy_score instead.") else: le = LabelEncoder() le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) sorted_labels = le.classes_ # labels are now from 0 to len(labels) - 1 -> use bincount tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None if len(tp_bins): tp_sum = np.bincount( tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: # Pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount( y_pred, weights=sample_weight, minlength=len(labels)) if len(y_true): true_sum = np.bincount( y_true, weights=sample_weight, minlength=len(labels)) # Compute the true negative tn_sum = y_true.size - (pred_sum + true_sum - tp_sum) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] true_sum = true_sum[indices] pred_sum = pred_sum[indices] tn_sum = tn_sum[indices] if average == 'micro': tp_sum = np.array([tp_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) true_sum = np.array([true_sum.sum()]) tn_sum = np.array([tn_sum.sum()]) # Finally, we have all our sufficient statistics. Divide! # with np.errstate(divide='ignore', invalid='ignore'): # Divide, and on zero-division, set scores to 0 and warn: # Oddly, we may get an "invalid" rather than a "divide" error # here. specificity = _prf_divide(tn_sum, tn_sum + pred_sum - tp_sum, 'specificity', 'predicted', average, warn_for) sensitivity = _prf_divide(tp_sum, true_sum, 'sensitivity', 'true', average, warn_for) # Average the results if average == 'weighted': weights = true_sum if weights.sum() == 0: return 0, 0, None elif average == 'samples': weights = sample_weight else: weights = None if average is not None: assert average != 'binary' or len(specificity) == 1 specificity = np.average(specificity, weights=weights) sensitivity = np.average(sensitivity, weights=weights) true_sum = None # return no support return sensitivity, specificity, true_sum
def classification_report(y_true, y_pred, labels=None, target_names=None, sample_weight=None): if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' if target_names is None: width = len(last_line_heading) target_names = ['%s' % l for l in labels] else: width = max(len(cn) for cn in target_names) width = max(width, len(last_line_heading)) headers = ["precision", "recall", "f1-score", "support"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) microf1 = f1_score(y_true, y_pred, average='micro') micropre = precision_score(y_true, y_pred, average='micro') microrec = recall_score(y_true, y_pred, average='micro') for i, label in enumerate(labels): values = [target_names[i]] for v in (p[i], r[i], f1[i]): values += ["{0:0.4f}".format(v)] values += ["{0}".format(s[i])] report += fmt % tuple(values) report += '\n' # compute averages values = [last_line_heading] for v in (np.average(p, weights=s), np.average(r, weights=s), np.average(f1, weights=s)): values += ["{0:0.4f}".format(v)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) values = ["micro avg"] values += ["{0:0.4f}".format(micropre)] values += ["{0:0.4f}".format(microrec)] values += ["{0:0.4f}".format(microf1)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) return report
def classification_report_imbalanced(y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, alpha=0.1): """Build a classification report based on metrics used with imbalanced dataset Specific metrics have been proposed to evaluate the classification performed on imbalanced dataset. This report compiles the state-of-the-art metrics: precision/recall/specificity, geometric mean, and index balanced accuracy of the geometric mean. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. target_names : list of strings, optional Optional display names matching the labels (same order). sample_weight : ndarray, shape (n_samples, ) Sample weights. digits : int, optional (default=2) Number of digits for formatting output floating point values alpha : float, optional (default=0.1) Weighting factor. Returns ------- report : string Text summary of the precision, recall, specificity, geometric mean, and index balanced accuracy. Examples -------- >>> import numpy as np >>> from imblearn.metrics import classification_report_imbalanced >>> y_true = [0, 1, 2, 2, 2] >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE >>> target_names = ['class 0', 'class 1', \ 'class 2'] # doctest : +NORMALIZE_WHITESPACE >>> print(classification_report_imbalanced(y_true, y_pred, \ target_names=target_names)) pre rec spe f1 geo iba\ sup <BLANKLINE> class 0 0.50 1.00 0.75 0.67 0.87 0.77\ 1 class 1 0.00 0.00 0.75 0.00 0.00 0.00\ 1 class 2 1.00 0.67 1.00 0.80 0.82 0.64\ 3 <BLANKLINE> avg / total 0.70 0.60 0.90 0.61 0.66 0.54\ 5 <BLANKLINE> """ if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' if target_names is None: target_names = ['%s' % l for l in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Specificity specificity = specificity_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Geometric mean geo_mean = geometric_mean_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) iba = iba_gmean( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) for i, label in enumerate(labels): values = [target_names[i]] for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i], iba[i]): values += ["{0:0.{1}f}".format(v, digits)] values += ["{0}".format(support[i])] report += fmt % tuple(values) report += '\n' # compute averages values = [last_line_heading] for v in (np.average(precision, weights=support), np.average( recall, weights=support), np.average(specificity, weights=support), np.average(f1, weights=support), np.average( geo_mean, weights=support), np.average(iba, weights=support)): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(support))] report += fmt % tuple(values) return report
def sensitivity_specificity_support( y_true, y_pred, labels=None, pos_label=1, average=None, warn_for=("sensitivity", "specificity"), sample_weight=None, ): """Compute sensitivity, specificity, and support for each class The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. The sensitivity quantifies the ability to avoid false negatives_[1]. The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of true negatives and ``fn`` the number of false negatives. The specificity quantifies the ability to avoid false positives_[1]. The support is the number of occurrences of each class in ``y_true``. If ``pos_label is None`` and in binary classification, this function returns the average sensitivity and specificity if ``average`` is one of ``'weighted'``. Read more in the :ref:`User Guide <sensitivity_specificity>`. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, optional (default=1) The class to report if ``average='binary'`` and the data is binary. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : str or None, optional (default=None) If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). warn_for : tuple or set, for internal use This determines which warnings will be made in the case that this function is being used to return only one of its metrics. sample_weight : ndarray, shape (n_samples, ) Sample weights. Returns ------- sensitivity : float (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) specificity : float (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) support : int (if ``average`` = None) or ndarray, \ shape (n_unique_labels, ) The number of occurrences of each label in ``y_true``. References ---------- .. [1] `Wikipedia entry for the Sensitivity and specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ Examples -------- >>> import numpy as np >>> from imblearn.metrics import sensitivity_specificity_support >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> sensitivity_specificity_support(y_true, y_pred, average='macro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='micro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='weighted') (0.33333333333333331, 0.66666666666666663, None) """ average_options = (None, "micro", "macro", "weighted", "samples") if average not in average_options and average != "binary": raise ValueError("average has to be one of " + str(average_options)) y_type, y_true, y_pred = _check_targets(y_true, y_pred) present_labels = unique_labels(y_true, y_pred) if average == "binary": if y_type == "binary": if pos_label not in present_labels: if len(present_labels) < 2: # Only negative labels return (0.0, 0.0, 0) else: raise ValueError("pos_label=%r is not a valid label: %r" % (pos_label, present_labels)) labels = [pos_label] else: raise ValueError("Target is %s but average='binary'. Please " "choose another average setting." % y_type) elif pos_label not in (None, 1): warnings.warn( "Note that pos_label (set to %r) is ignored when " "average != 'binary' (got %r). You may use " "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning, ) if labels is None: labels = present_labels n_labels = None else: n_labels = len(labels) labels = np.hstack( [labels, np.setdiff1d(present_labels, labels, assume_unique=True)]) # Calculate tp_sum, pred_sum, true_sum ### if y_type.startswith("multilabel"): raise ValueError("imblearn does not support multilabel") elif average == "samples": raise ValueError("Sample-based precision, recall, fscore is " "not meaningful outside multilabel " "classification. See the accuracy_score instead.") else: le = LabelEncoder() le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) sorted_labels = le.classes_ # labels are now from 0 to len(labels) - 1 -> use bincount tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: # Pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, minlength=len(labels)) if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, minlength=len(labels)) # Compute the true negative tn_sum = y_true.size - (pred_sum + true_sum - tp_sum) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] true_sum = true_sum[indices] pred_sum = pred_sum[indices] tn_sum = tn_sum[indices] if average == "micro": tp_sum = np.array([tp_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) true_sum = np.array([true_sum.sum()]) tn_sum = np.array([tn_sum.sum()]) # Finally, we have all our sufficient statistics. Divide! # with np.errstate(divide="ignore", invalid="ignore"): # Divide, and on zero-division, set scores to 0 and warn: # Oddly, we may get an "invalid" rather than a "divide" error # here. specificity = _prf_divide( tn_sum, tn_sum + pred_sum - tp_sum, "specificity", "predicted", average, warn_for, ) sensitivity = _prf_divide(tp_sum, true_sum, "sensitivity", "true", average, warn_for) # Average the results if average == "weighted": weights = true_sum if weights.sum() == 0: return 0, 0, None elif average == "samples": weights = sample_weight else: weights = None if average is not None: assert average != "binary" or len(specificity) == 1 specificity = np.average(specificity, weights=weights) sensitivity = np.average(sensitivity, weights=weights) true_sum = None # return no support return sensitivity, specificity, true_sum
def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues): # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = np.array(classes)[unique_labels(y_true, y_pred)] # print(cm) fig, (ax1, ax2 )= plt.subplots(2,1, figsize=(8, 10)) im = ax1.imshow(cm, interpolation='nearest', cmap=cmap) ax1.figure.colorbar(im, ax=ax1) # We want to show all ticks... ax1.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title='Confusion Matrix', ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax1.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax1.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() plt.xlim(-0.5, len(classes)-0.5) plt.ylim(len(classes)-0.5, -0.5) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] im = ax2.imshow(cm, interpolation='nearest', cmap=cmap) ax2.figure.colorbar(im, ax=ax2) # We want to show all ticks... ax2.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title='Confusion Matrix Normalized', ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax2.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax2.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() plt.xlim(-0.5, len(classes)-0.5) plt.ylim(len(classes)-0.5, -0.5) return fig
def geometric_mean_score( y_true, y_pred, labels=None, pos_label=1, average="multiclass", sample_weight=None, correction=0.0, ): """Compute the geometric mean. The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. For binary classification G-mean is the squared root of the product of the sensitivity and specificity. For multi-class problems it is a higher root of the product of sensitivity for each class. For compatibility with other imbalance performance measures, G-mean can be calculated for each class separately on a one-vs-rest basis when ``average != 'multiclass'``. The best value is 1 and the worst value is 0. Traditionally if at least one class is unrecognized by the classifier, G-mean resolves to zero. To alleviate this property, for highly multi-class the sensitivity of unrecognized classes can be "corrected" to be a user specified value (instead of zero). This option works only if ``average == 'multiclass'``. Read more in the :ref:`User Guide <imbalanced_metrics>`. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. pos_label : str or int, optional (default=1) The class to report if ``average='binary'`` and the data is binary. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : str or None, optional (default='multiclass') If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : ndarray, shape (n_samples, ) Sample weights. correction: float, optional (default=0.0) Substitutes sensitivity of unrecognized classes from zero to a given value. Returns ------- geometric_mean : float Notes ----- See :ref:`sphx_glr_auto_examples_evaluation_plot_metrics.py`. References ---------- .. [1] Kubat, M. and Matwin, S. "Addressing the curse of imbalanced training sets: one-sided selection" ICML (1997) .. [2] Barandela, R., Sánchez, J. S., Garcıa, V., & Rangel, E. "Strategies for learning in class imbalance problems", Pattern Recognition, 36(3), (2003), pp 849-851. Examples -------- >>> from imblearn.metrics import geometric_mean_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> geometric_mean_score(y_true, y_pred) 0.0 >>> geometric_mean_score(y_true, y_pred, correction=0.001) 0.010000000000000004 >>> geometric_mean_score(y_true, y_pred, average='macro') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average='micro') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average='weighted') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average=None) array([ 0.8660254, 0. , 0. ]) """ if average is None or average != "multiclass": sen, spe, _ = sensitivity_specificity_support( y_true, y_pred, labels=labels, pos_label=pos_label, average=average, warn_for=("specificity", "specificity"), sample_weight=sample_weight, ) return np.sqrt(sen * spe) else: present_labels = unique_labels(y_true, y_pred) if labels is None: labels = present_labels n_labels = None else: n_labels = len(labels) labels = np.hstack([ labels, np.setdiff1d(present_labels, labels, assume_unique=True), ]) le = LabelEncoder() le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) sorted_labels = le.classes_ # labels are now from 0 to len(labels) - 1 -> use bincount tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: # Pathological case true_sum = tp_sum = np.zeros(len(labels)) if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, minlength=len(labels)) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] true_sum = true_sum[indices] with np.errstate(divide="ignore", invalid="ignore"): recall = _prf_divide(tp_sum, true_sum, "recall", "true", None, "recall") recall[recall == 0] = correction with np.errstate(divide="ignore", invalid="ignore"): gmean = sp.stats.gmean(recall) # old version of scipy return MaskedConstant instead of 0.0 if isinstance(gmean, np.ma.core.MaskedConstant): return 0.0 return gmean
def classification_report_imbalanced( y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, alpha=0.1, ): """Build a classification report based on metrics used with imbalanced dataset Specific metrics have been proposed to evaluate the classification performed on imbalanced dataset. This report compiles the state-of-the-art metrics: precision/recall/specificity, geometric mean, and index balanced accuracy of the geometric mean. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. target_names : list of strings, optional Optional display names matching the labels (same order). sample_weight : ndarray, shape (n_samples, ) Sample weights. digits : int, optional (default=2) Number of digits for formatting output floating point values alpha : float, optional (default=0.1) Weighting factor. Returns ------- report : string Text summary of the precision, recall, specificity, geometric mean, and index balanced accuracy. Examples -------- >>> import numpy as np >>> from imblearn.metrics import classification_report_imbalanced >>> y_true = [0, 1, 2, 2, 2] >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE >>> target_names = ['class 0', 'class 1', \ 'class 2'] # doctest : +NORMALIZE_WHITESPACE >>> print(classification_report_imbalanced(y_true, y_pred, \ target_names=target_names)) pre rec spe f1 geo iba\ sup <BLANKLINE> class 0 0.50 1.00 0.75 0.67 0.87 0.77\ 1 class 1 0.00 0.00 0.75 0.00 0.00 0.00\ 1 class 2 1.00 0.67 1.00 0.80 0.82 0.64\ 3 <BLANKLINE> avg / total 0.70 0.60 0.90 0.61 0.66 0.54\ 5 <BLANKLINE> """ if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = "avg / total" if target_names is None: target_names = ["%s" % l for l in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"] fmt = "%% %ds" % width # first column: class name fmt += " " fmt += " ".join(["% 9s" for _ in headers]) fmt += "\n" headers = [""] + headers report = fmt % tuple(headers) report += "\n" # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight, ) # Specificity specificity = specificity_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight, ) # Geometric mean geo_mean = geometric_mean_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight, ) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) iba = iba_gmean( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight, ) for i, label in enumerate(labels): values = [target_names[i]] for v in ( precision[i], recall[i], specificity[i], f1[i], geo_mean[i], iba[i], ): values += ["{0:0.{1}f}".format(v, digits)] values += ["{}".format(support[i])] report += fmt % tuple(values) report += "\n" # compute averages values = [last_line_heading] for v in ( np.average(precision, weights=support), np.average(recall, weights=support), np.average(specificity, weights=support), np.average(f1, weights=support), np.average(geo_mean, weights=support), np.average(iba, weights=support), ): values += ["{0:0.{1}f}".format(v, digits)] values += ["{}".format(np.sum(support))] report += fmt % tuple(values) return report
def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): """ Adapt the class with the same name in scikit-learn to accept missing data in the given X and y. """ # If the ratio of data variance between dimensions is too small, it # will cause numerical errors. To address this, we artificially # boost the variance by epsilon, a small fraction of the standard # deviation of the largest dimension. epsilon = 1e-9 * np.var(X, axis=0).max() if _refit: self.classes_ = None if getattr(self, 'classes_', None) is None: self.classes_ = multiclass.unique_labels(classes) # This is the first call to partial_fit: # initialize various cumulative counters n_features = X.shape[1] n_classes = len(self.classes_) self.theta_ = np.zeros((n_classes, n_features)) self.sigma_ = np.zeros((n_classes, n_features)) self.class_count_ = np.zeros((n_classes, n_features), dtype=np.int64) self.class_prior_ = np.zeros(n_classes, dtype=np.float64) # Initialise the class prior n_classes = len(self.classes_) # Take into account the priors if self.priors is not None: priors = np.asarray(self.priors) # Check that the provide prior match the number of classes if len(priors) != n_classes: raise ValueError('Number of priors must match number of' ' classes.') # Check that the sum is 1 if priors.sum() != 1.0: raise ValueError('The sum of the priors should be 1.') # Check that the prior are non-negative if (priors < 0).any(): raise ValueError('Priors must be non-negative.') self.class_prior_ = priors else: # Initialize the priors to zeros for each class self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64) else: if X.shape[1] != self.theta_.shape[1]: msg = "Number of features %d does not match previous data %d." raise ValueError(msg % (X.shape[1], self.theta_.shape[1])) # Put epsilon back in each time self.sigma_[:, :] -= epsilon classes = self.classes_ unique_y = np.unique(y) unique_y_in_classes = naive_bayes.in1d(unique_y, classes) if not np.all(unique_y_in_classes): raise ValueError("The target label(s) %s in y do not exist in the " "initial classes %s" % (unique_y[~unique_y_in_classes], classes)) class_prior = np.zeros(len(self.classes_), dtype=np.float64) for y_i in unique_y: i = classes.searchsorted(y_i) class_prior[i] = np.sum(y == y_i) X_i = X[y == y_i] if sample_weight is not None: sw_i = sample_weight[y == y_i] N_i = sw_i.sum() else: sw_i = None N_i = np.sum(~np.isnan(X_i), axis=0) new_theta, new_sigma = update_mean_variance( self.class_count_[i], self.theta_[i], self.sigma_[i], X_i, sw_i) new_theta[np.isnan(new_theta)] = 0. new_sigma[np.isnan(new_sigma)] = 0. self.theta_[i, :] = new_theta self.sigma_[i, :] = new_sigma self.class_count_[i] += N_i self.sigma_[:, :] += epsilon # Update if only no priors is provided if self.priors is None: # Empirical prior, with sample_weight taken into account self.class_prior_ += class_prior return self
def cm(x, y): cm = confusion_matrix(x, y) cols = unique_labels(x) df_cm = pd.DataFrame(cm, columns=cols, index=cols) plt.figure(figsize=(10, 7)) return sns.heatmap(df_cm, annot=True, cmap='Blues', fmt='.0f')
def test_unique_labels(): # Empty iterable assert_raises(ValueError, unique_labels) # Multiclass problem assert_array_equal(unique_labels(xrange(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) # Multilabels assert_array_equal(unique_labels([(0, 1, 2), (0,), tuple(), (2, 1)]), np.arange(3)) assert_array_equal(unique_labels([[0, 1, 2], [0], list(), [2, 1]]), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)) assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3)) # Several arrays passed assert_array_equal(unique_labels([4, 0, 2], xrange(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) # Border line case with binary indicator matrix assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5))) assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5))) assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5)) # Some tests with strings input assert_array_equal(unique_labels(["a", "b", "c"], ["d"]), ["a", "b", "c", "d"]) assert_array_equal(unique_labels([["a", "b"], ["c"]], [["d"]]), ["a", "b", "c", "d"]) # Smoke test for all supported format for format in ["binary", "multiclass", "multilabel-sequences", "multilabel-indicator"]: for y in EXAMPLES[format]: unique_labels(y) # We don't support those format at the moment for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, unique_labels, example) for y_type in ["unknown", "continuous", 'continuous-multioutput', 'multiclass-multioutput']: for example in EXAMPLES[y_type]: assert_raises(ValueError, unique_labels, example) #Mix of multilabel-indicator and multilabel-sequences mix_multilabel_format = product(EXAMPLES["multilabel-indicator"], EXAMPLES["multilabel-sequences"]) for y_multilabel, y_multiclass in mix_multilabel_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) #Mix with binary or multiclass and multilabel mix_clf_format = product(EXAMPLES["multilabel-indicator"] + EXAMPLES["multilabel-sequences"], EXAMPLES["multiclass"] + EXAMPLES["binary"]) for y_multilabel, y_multiclass in mix_clf_format: assert_raises(ValueError, unique_labels, y_multiclass, y_multilabel) assert_raises(ValueError, unique_labels, y_multilabel, y_multiclass) # Mix string and number input type assert_raises(ValueError, unique_labels, [[1, 2], [3]], [["a", "d"]]) assert_raises(ValueError, unique_labels, ["1", 2]) assert_raises(ValueError, unique_labels, [["1", 2], [3]]) assert_raises(ValueError, unique_labels, [["1", "2"], [3]]) assert_array_equal(unique_labels([(2,), (0, 2,)], [(), ()]), [0, 2]) assert_array_equal(unique_labels([("2",), ("0", "2",)], [(), ()]), ["0", "2"])
def __init__(self, y, X=None, instance_indexes=None, query_type='AllLabels', saving_path=None, **kwargs): self._index_len = None # check and record parameters self._y = check_array(y, ensure_2d=False, dtype=None) ytype = type_of_target(y) if len(self._y.shape) == 2: self._target_type = 'multilabel' else: self._target_type = ytype self._index_len = len(self._y) if len(self._y.shape) == 1: self._label_space = unique_labels(self._y) elif len(self._y.shape) == 2: self._label_space = list(range(self._y.shape[1])) else: raise ValueError("Label matrix should be 1d or 2d array.") self._label_num = len(self._label_space) self._instance_flag = False if X is not None: self._instance_flag = True self._X = check_array(X, accept_sparse='csr', ensure_2d=True, order='C') n_samples = self._X.shape[0] if n_samples != self._index_len: raise ValueError("Different length of instances and labels found.") else: self._index_len = n_samples if instance_indexes is None: self._indexes = [i for i in range(self._index_len)] else: if len(instance_indexes) != self._index_len: raise ValueError("Length of given instance_indexes do not accord the data set.") self._indexes = copy.copy(instance_indexes) if check_query_type(query_type): self.query_type = query_type if self.query_type == 'Features' and not self._instance_flag: raise Exception("In feature querying, feature matrix must be given.") else: raise NotImplementedError("Query type %s is not implemented." % type) self._split = False train_idx = kwargs.pop('train_idx', None) test_idx = kwargs.pop('test_idx', None) label_idx = kwargs.pop('label_idx', None) unlabel_idx = kwargs.pop('unlabel_idx', None) if train_idx is not None and test_idx is not None and label_idx is not None and unlabel_idx is not None: if not (len(train_idx) == len(test_idx) == len(label_idx) == len(unlabel_idx)): raise ValueError("train_idx, test_idx, label_idx, unlabel_idx " "should have the same split count (length)") self._split = True self.train_idx = train_idx self.test_idx = test_idx self.label_idx = label_idx self.unlabel_idx = unlabel_idx self.split_count = len(train_idx) self._saving_path = saving_path self._saving_dir = None if saving_path is not None: if not isinstance(self._saving_path, str): raise TypeError("A string is expected, but received: %s" % str(type(self._saving_path))) self._saving_path = os.path.abspath(saving_path) if os.path.isdir(self._saving_path): self._saving_dir = self._saving_path else: self._saving_dir = os.path.split(self._saving_path)[0] # if a directory, a dir and None will return. self.save()
def geometric_mean_score(y_true, y_pred, labels=None, pos_label=1, average='multiclass', sample_weight=None, correction=0.0): """Compute the geometric mean. The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. For binary classification G-mean is the squared root of the product of the sensitivity and specificity. For multi-class problems it is a higher root of the product of sensitivity for each class. For compatibility with other imbalance performance measures, G-mean can be calculated for each class separately on a one-vs-rest basis when ``average != 'multiclass'``. The best value is 1 and the worst value is 0. Traditionally if at least one class is unrecognized by the classifier, G-mean resolves to zero. To alleviate this property, for highly multi-class the sensitivity of unrecognized classes can be "corrected" to be a user specified value (instead of zero). This option works only if ``average == 'multiclass'``. Read more in the :ref:`User Guide <imbalanced_metrics>`. Parameters ---------- y_true : ndarray, shape (n_samples, ) Ground truth (correct) target values. y_pred : ndarray, shape (n_samples, ) Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. pos_label : str or int, optional (default=1) The class to report if ``average='binary'`` and the data is binary. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : str or None, optional (default='multiclass') If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : ndarray, shape (n_samples, ) Sample weights. correction: float, optional (default=0.0) Substitutes sensitivity of unrecognized classes from zero to a given value. Returns ------- geometric_mean : float Notes ----- See :ref:`sphx_glr_auto_examples_evaluation_plot_metrics.py`. References ---------- .. [1] Kubat, M. and Matwin, S. "Addressing the curse of imbalanced training sets: one-sided selection" ICML (1997) .. [2] Barandela, R., Sánchez, J. S., Garcıa, V., & Rangel, E. "Strategies for learning in class imbalance problems", Pattern Recognition, 36(3), (2003), pp 849-851. Examples -------- >>> from imblearn.metrics import geometric_mean_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> geometric_mean_score(y_true, y_pred) 0.0 >>> geometric_mean_score(y_true, y_pred, correction=0.001) 0.010000000000000004 >>> geometric_mean_score(y_true, y_pred, average='macro') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average='micro') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average='weighted') 0.47140452079103168 >>> geometric_mean_score(y_true, y_pred, average=None) array([ 0.8660254, 0. , 0. ]) """ if average is None or average != 'multiclass': sen, spe, _ = sensitivity_specificity_support( y_true, y_pred, labels=labels, pos_label=pos_label, average=average, warn_for=('specificity', 'specificity'), sample_weight=sample_weight) LOGGER.debug('The sensitivity and specificity are : %s - %s' % (sen, spe)) return np.sqrt(sen * spe) else: present_labels = unique_labels(y_true, y_pred) if labels is None: labels = present_labels n_labels = None else: n_labels = len(labels) labels = np.hstack([ labels, np.setdiff1d(present_labels, labels, assume_unique=True) ]) le = LabelEncoder() le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) sorted_labels = le.classes_ # labels are now from 0 to len(labels) - 1 -> use bincount tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None if len(tp_bins): tp_sum = np.bincount( tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: # Pathological case true_sum = tp_sum = np.zeros(len(labels)) if len(y_true): true_sum = np.bincount( y_true, weights=sample_weight, minlength=len(labels)) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] true_sum = true_sum[indices] recall = _prf_divide(tp_sum, true_sum, "recall", "true", None, "recall") recall[recall == 0] = correction gmean = sp.stats.gmean(recall) # old version of scipy return MaskedConstant instead of 0.0 if isinstance(gmean, np.ma.core.MaskedConstant): return 0.0 return gmean
def plot_conf_mat(y_true, y_pred, class_names, normalize=True, title=None, cmap=plt.cm.viridis, text=True, width=8, height=8): """ This function prints and plots the confusion matrix. In case of errors, you may need to do class_names = np.array(class_names) before calling this function. Parameters: -------------------------- target: The array of the true categories. It contains as many values as the number of samples. Each value is an integer number corresponding to a certain category. This array represents the true category of each sample. predicted: It has the same format, but it does not represent the true category, rather it represents the result of a model. class_names: Array of strings, where the first. The k-th element is the name of the k-th class normalize: (default=True) If False, it just prints the number of values in each cell. Otherwise it prints the frequencies, i.e. the sum over each row is 1 title: (default=None) Title of the figure cmap: (default=plt.cm.viridis) Color map text: (default=True) If True it prints numerical values on each cell. Otherwise it just shows the colors width: (default=8) Of the figure height: (default=8) Of the figure """ if not isinstance(class_names, (np.ndarray)): raise TypeError('class_names must be an np.array. It is instead ', type(class_names), '. Try to convert to arrays before: executing', 'class_names = np.array(class_names)') if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data labels_present = unique_labels(y_true, y_pred) classes = class_names[labels_present] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots(figsize=(width, height)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. if text == True: fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] < thresh else "black") fig.tight_layout() return ax
def classification_report( y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2): """Build a text report showing the main classification metrics Read more in the :ref:`User Guide <classification_report>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. labels : array, shape = [n_labels] Optional list of label indices to include in the report. target_names : list of strings Optional display names matching the labels (same order). sample_weight : array-like of shape = [n_samples], optional Sample weights. digits : int Number of digits for formatting output floating point values Returns ------- report : string Text summary of the precision, recall, F1 score for each class. Examples -------- >>> from sklearn.metrics import classification_report >>> y_true = [0, 1, 2, 2, 2] >>> y_pred = [0, 0, 2, 2, 1] >>> target_names = ['class 0', 'class 1', 'class 2'] >>> print(classification_report(y_true, y_pred, target_names=target_names)) precision recall f1-score support <BLANKLINE> class 0 0.50 1.00 0.67 1 class 1 0.00 0.00 0.00 1 class 2 1.00 0.67 0.80 3 <BLANKLINE> avg / total 0.70 0.60 0.61 5 <BLANKLINE> """ import numpy as np from sklearn.metrics import precision_recall_fscore_support from sklearn.utils.multiclass import unique_labels if labels is None: labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' if target_names is None: target_names = ['%s' % l for l in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) headers = ["precision", "recall", "f1-score", "support"] fmt = '%% %ds' % width # first column: class name fmt += ' ' fmt += ' '.join(['% 9s' for _ in headers]) fmt += '\n' headers = [""] + headers report = fmt % tuple(headers) report += '\n' p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) for i, label in enumerate(labels): values = [target_names[i]] for v in (p[i], r[i], f1[i]): values += ["{0:0.{1}f}".format(v, digits)] values += ["{0}".format(s[i])] report += fmt % tuple(values) report += '\n' values = ["weighted " + last_line_heading] for v in (np.average(p, weights=s), np.average(r, weights=s), np.average(f1, weights=s)): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average="macro", sample_weight=sample_weight) # compute averages values = ["macro " + last_line_heading] for v in (p, r, f1): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average="micro", sample_weight=sample_weight) # compute averages values = ["micro " + last_line_heading] for v in (p, r, f1): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(s))] report += fmt % tuple(values) return report
def fit(self, X, y, feature_names=None, sample_weight=None): """Fit the model according to the given training data. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Target vector relative to X. Has to follow the convention 0 for normal data, 1 for anomalies. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples, typically the amount in case of transactions data. Used to grow regression trees producing further rules to be tested. If not provided, then each sample is given unit weight. Returns ------- self : object Returns self. """ X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_ = X.shape[1] self.classes_ = unique_labels(y) self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = list(self.feature_dict_.keys()) self.feature_names = list(self.feature_dict_.values()) n_train = y.shape[0] w = np.ones(n_train) / n_train self.estimators_ = [] self.estimator_weights_ = [] self.estimator_errors_ = [] self.feature_names = feature_names for _ in range(self.n_estimators): # Fit a classifier with the specific weights clf = self.estimator() clf.fit(X, y, sample_weight=w) # uses w as the sampling weight! preds = clf.predict(X) # Indicator function miss = preds != y # Equivalent with 1/-1 to update weights miss2 = np.ones(miss.size) miss2[~miss] = -1 # Error err_m = np.dot(w, miss) / sum(w) if err_m < 1e-3: return self # Alpha alpha_m = 0.5 * np.log((1 - err_m) / float(err_m)) # New weights w = np.multiply(w, np.exp([float(x) * alpha_m for x in miss2])) self.estimators_.append(deepcopy(clf)) self.estimator_weights_.append(alpha_m) self.estimator_errors_.append(err_m) rules = [] for est, est_weight in zip(self.estimators_, self.estimator_weights_): est_rules_values = tree_to_rules(est, self.feature_placeholders, prediction_values=True) est_rules = list(map(lambda x: x[0], est_rules_values)) # BRS scores are difference between class 1 % and class 0 % in a node est_values = np.array(list(map(lambda x: x[1], est_rules_values))) rule_scores = (est_values[:, 1] - est_values[:, 0]) / est_values.sum(axis=1) compos_score = est_weight * rule_scores rules += [ Rule(r, args=[w]) for (r, w) in zip(est_rules, compos_score) ] self.rules_without_feature_names_ = rules self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self