def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label')
def plot_confusion_matrix(y_true, y_pred, labels=None, title=None, normalize=False, hide_zeros=False, x_tick_rotation=0, ax=None, figsize=None, title_fontsize="large", text_fontsize="medium"): """Generates confusion matrix plot for a given set of ground truth labels and classifier predictions. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_pred (array-like, shape (n_samples)): Estimated targets as returned by a classifier. labels (array-like, shape (n_classes), optional): List of labels to index the matrix. This may be used to reorder or select a subset of labels. If none is given, those that appear at least once in ``y_true`` or ``y_pred`` are used in sorted order. (new in v0.2.5) title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if `normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. hide_zeros (bool, optional): If True, does not plot cells containing a value of zero. Defaults to False. x_tick_rotation (int, optional): Rotates x-axis tick labels by the specified angle. This is useful in cases where there are numerous categories and the labels overlap each other. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> import scikitplot.plotters as skplt >>> rf = RandomForestClassifier() >>> rf = rf.fit(X_train, y_train) >>> y_pred = rf.predict(X_test) >>> skplt.plot_confusion_matrix(y_test, y_pred, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) cm = confusion_matrix(y_true, y_pred, labels=labels) if labels is None: classes = unique_labels(y_true, y_pred) else: classes = np.asarray(labels) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] cm = np.around(cm, decimals=2) if title: ax.set_title(title, fontsize=title_fontsize) elif normalize: ax.set_title('Normalized Confusion Matrix', fontsize=title_fontsize) else: ax.set_title('Confusion Matrix', fontsize=title_fontsize) image = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) plt.colorbar(mappable=image) tick_marks = np.arange(len(classes)) ax.set_xticks(tick_marks) ax.set_xticklabels(classes, fontsize=text_fontsize, rotation=x_tick_rotation) ax.set_yticks(tick_marks) ax.set_yticklabels(classes, fontsize=text_fontsize) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if not (hide_zeros and cm[i, j] == 0): ax.text(j, i, cm[i, j], horizontalalignment="center", verticalalignment="center", fontsize=text_fontsize, color="white" if cm[i, j] > thresh else "black") ax.set_ylabel('True label', fontsize=text_fontsize) ax.set_xlabel('Predicted label', fontsize=text_fontsize) ax.grid('off') return ax
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) print(cm) # Only use the labels that appear in the data classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
def plot_conf_mat(y_true, y_pred, class_names, normalize=True, title=None, cmap=plt.cm.viridis, text=True, width=8, height=8): """ This function prints and plots the confusion matrix. In case of errors, you may need to do class_names = np.array(class_names) before calling this function. Parameters: -------------------------- target: The array of the true categories. It contains as many values as the number of samples. Each value is an integer number corresponding to a certain category. This array represents the true category of each sample. predicted: It has the same format, but it does not represent the true category, rather it represents the result of a model. class_names: Array of strings, where the first. The k-th element is the name of the k-th class normalize: (default=True) If False, it just prints the number of values in each cell. Otherwise it prints the frequencies, i.e. the sum over each row is 1 title: (default=None) Title of the figure cmap: (default=plt.cm.viridis) Color map text: (default=True) If True it prints numerical values on each cell. Otherwise it just shows the colors width: (default=8) Of the figure height: (default=8) Of the figure """ if not isinstance(class_names, (np.ndarray)): raise TypeError('class_names must be an np.array. It is instead ', type(class_names), '. Try to convert to arrays before: executing', 'class_names = np.array(class_names)') if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data labels_present = unique_labels(y_true, y_pred) classes = class_names[labels_present] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots(figsize=(width, height)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. if text == True: fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] < thresh else "black") fig.tight_layout() return ax
def plot_confusion_matrix(y_true, y_pred, title=None, normalize=False, ax=None, figsize=None, title_fontsize="large", text_fontsize="medium"): """Generates confusion matrix plot for a given set of ground truth labels and classifier predictions. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_pred (array-like, shape (n_samples)): Estimated targets as returned by a classifier. title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if `normalize` is True. Else, defaults to "Normalized Confusion Matrix. normalize (bool, optional): If True, normalizes the confusion matrix before plotting. Defaults to False. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> import scikitplot.plotters as skplt >>> rf = RandomForestClassifier() >>> rf = rf.fit(X_train, y_train) >>> y_pred = rf.predict(X_test) >>> skplt.plot_confusion_matrix(y_test, y_pred, normalize=True) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_confusion_matrix.png :align: center :alt: Confusion matrix """ if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) cm = confusion_matrix(y_true, y_pred) classes = np.unique(y_true) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] cm = np.around(cm, decimals=2) if title: ax.set_title(title, fontsize=title_fontsize) elif normalize: ax.set_title('Normalized Confusion Matrix', fontsize=title_fontsize) else: ax.set_title('Confusion Matrix', fontsize=title_fontsize) image = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) plt.colorbar(mappable=image) tick_marks = np.arange(len(classes)) ax.set_xticks(tick_marks) ax.set_xticklabels(classes, fontsize=text_fontsize) ax.set_yticks(tick_marks) ax.set_yticklabels(classes, fontsize=text_fontsize) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): ax.text(j, i, cm[i, j], horizontalalignment="center", verticalalignment="center", fontsize=text_fontsize, color="white" if cm[i, j] > thresh else "black") ax.set_ylabel('True label', fontsize=text_fontsize) ax.set_xlabel('Predicted label', fontsize=text_fontsize) return ax
def confusion_matrix(self, y_true, y_pred, title=None, classes=None): """ Generates the confusion matrix created from applying naive Bayes Parameters ---------- y_true : dataframe the true values of the features used y_pred : dataframe the predicted values from the features used title : string the title of the confustion_matrix Returns ------- figure confusion_matrix visualization """ try: cm = confusion_matrix(y_true, y_pred) cm_norm = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]) * 100 fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) ax.figure.colorbar(im, ax=ax) if (classes == None): ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), ylabel='True Label', xlabel='Predicted Label') else: ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes, ylabel='True Label', xlabel='Predicted Label') thresh = cm.max() / 2 for x in range(cm_norm.shape[0]): for y in range(cm_norm.shape[1]): if (x == y): ax.text( y, x, f"{cm[x,y]}({cm_norm[x,y]:.2f}%)", ha="center", va="center", fontsize=12, color="white" if cm[x, y] > thresh else "black") else: ax.text( y, x, f"{cm[x,y]}({cm_norm[x,y]:.2f}%)", ha="center", va="center", color="white" if cm[x, y] > thresh else "black") plt.title(title) plt.subplots_adjust(left=0) return fig except Exception as e: print(e)
def plot_confusion_matrix(Y_true, Y_pred, target_names, title='Confusion matrix', cmap=None, normalize=False, figsize=(5, 5)): """ given the true (Y_true) and the predicted (Y_pred) labels, makes the confusion matrix. :param np.array Y_true: the true labels of the data. (no one hot encoding). :param np.array Y_pred: the predicted labels of the data by the model. (no one hot encoding). :param list target_names: given classification classes such as [0, 1, 2] the class names, for example: ['high', 'medium', 'low']. :param str title: the text to display at the top of the matrix. :param str cmap: the gradient of the values displayed from matplotlib.pyplot.cm see http://matplotlib.org/examples/color/colormaps_reference.html plt.get_cmap('jet') or plt.cm.Blues. :param bool normalize: if False, plot the raw numbers, if True, plot the proportions. :reference: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html """ import itertools cm = confusion_matrix(Y_true, Y_pred) accuracy = np.trace(cm) / float(np.sum(cm)) misclass = 1 - accuracy if cmap is None: cmap = plt.get_cmap('Blues') plt.figure(figsize=figsize) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() if target_names is not None: tick_marks = np.arange(len(target_names)) plt.xticks(tick_marks, target_names, rotation=45) plt.yticks(tick_marks, target_names) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] thresh = cm.max() / 1.5 if normalize else cm.max() / 2 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if normalize: plt.text(j, i, "{:0.4f}".format(cm[i, j]), verticalalignment="center", horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") else: plt.text(j, i, "{:,}".format(cm[i, j]), verticalalignment="center", horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format( accuracy, misclass)) plt.show()
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, fig=None, ax=None, cax=None): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # # Only use the labels that appear in the data # classes = classes[unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') if fig == None: fig, ax = plt.subplots(figsize=(7, 4)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) if cax == None: ax.figure.colorbar(im, ax=ax, fraction=0.046, pad=0.04, ticks=[0, 0.5, 1]) else: cbar = fig.colorbar(im, cax=cax, pad=0.03, ticks=[0, 0.5, 1]) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, ylabel='True label', xlabel='Predicted label') ax.set_title(title, loc='center') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.3f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): if (cm[i, j] == 0.): l = str(0) else: l = format(cm[i, j], fmt) if float(l) < 1e-3: l = str(0) if float(l) == 1.: l = str(1) ax.text(j, i, l, fontsize=6, ha="center", va="center", color="w" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) cb = plt.colorbar() cb.ax.tick_params(labelsize=20) tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45)#,fontsize=22, weight='bold') plt.yticks(tick_marks, classes)#,fontsize=22, weight='bold') fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black",fontsize=22, weight='bold') plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') %%time # Test and evaluate pred = clf.predict(X_test_scaled)
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues, fontsize=16): """ This function printed and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. Attention : les classes commencent à zero copier/coller d'un tutoriel sklearn? """ if title is None: title = "" if isinstance(title, str) and title == "": if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # suppose que les classes sont numerotees à partir de 0 # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = [classes[i] for i in unique_labels(y_true, y_pred)] if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] #print("Normalized confusion matrix") #else: # print('Confusion matrix, without normalization') #print(cm) #fig, ax = plt.subplots() ax = plt.gca() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) #ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]) # ... and label them with the respective list entries #xticklabels=classes, yticklabels=classes #title=title, #ylabel='True label', #xlabel='Predicted label' ) ax.set_title(title, fontsize=fontsize) ax.set_xlabel('Predicted label', fontsize=fontsize) ax.set_xticklabels(classes, fontsize=fontsize) ax.set_ylabel('True label', fontsize=fontsize) ax.set_yticklabels(classes, fontsize=fontsize) ## Rotate the tick labels and set their alignment. #plt.setp(ax.get_xticklabels(), rotation=45, ha="right", # rotation_mode="anchor",fontsize=fontsize) # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", fontsize=fontsize, color="white" if cm[i, j] > thresh else "black") return ax
def plot_confusion_matrix(cm, classes, path, encoder_model, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues ): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. Taken straight vom SKLEARN. :param cm: confusion matrix generated by sklearn :param classes: range with lenght of classes :param encoder_model: encoder_model :param path: saving path """ # Set figsize plt.figure(figsize=(5.8, 3.58)) # change font size according to number of classes if len(classes) == 120: mpl.rcParams.update({'font.size': 3}) else: mpl.rcParams.update({'font.size': 5}) print("plot confusion matrix") path = path + '/build/' if not os.path.exists(path): os.makedirs(path) # Decode the class names path_to_labels = os.path.join(Path(os.path.abspath(__file__)).parents[2], "labels/") encoder_path = os.path.join(path_to_labels, encoder_model) encoder = LabelEncoder() encoder.classes_ = np.load(encoder_path) classes = encoder.inverse_transform(classes) classes = [cl.replace('_', ' ') for cl in classes] # Check if normalize is True, then scale the colorbar accordingly if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.imshow(cm, interpolation='nearest', cmap=cmap) if len(classes) == 120: plt.title(title, fontsize=12) else: plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45, horizontalalignment='right') plt.yticks(tick_marks, classes) # print text if not 120 classes are given if len(classes) != 120: # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.savefig("{}/confusion_matrix.pdf".format(path), dpi=500, pad_inches=0, bbox_inches='tight') plt.clf() # reset rcParams mpl.rcParams.update(mpl.rcParamsDefault)
def plt_confusion_matrix(cm, title='', classes=None, x_label=None, y_label=None, figsize=(14, 12), fontsize=35, cmap=plt.cm.Blues): '''plot confusion matrix Args: cm (numpy.ndarray): a confusion matrix. classes (list): list of classification name title (str) Retruns: fig (matplotlib.figure.Figure) ''' if not (classes or x_label or y_label): classes = range(len(cm)) if classes is not None: x_label = classes y_label = classes cm2 = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("ACC:" + format(sum(cm.diagonal()) / np.sum(cm) * 100, '.2f')) fig = figure(figsize=figsize, facecolor='w') plt.imshow(cm2, interpolation='nearest', cmap=cmap, vmin=0, vmax=1) plt.title(title, fontsize=20) plt.colorbar() tick_marks = np.arange(len(x_label)) plt.xticks(tick_marks, x_label, fontsize=20, color='#0b6008') plt.yticks(tick_marks, y_label, fontsize=20, color='#0b6008') thresh = cm.max() / 2. fmt = 'd' thresh2 = .5 fmt2 = '.2f' for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): text = '0.00' if np.isnan(cm[i, j]) else format(cm[i, j], fmt) text2 = '0.00' if np.isnan(cm2[i, j]) else format(cm2[i, j], fmt2) plt.text(j, i - 0.1, text, horizontalalignment="center", color="white" if cm2[i, j] > thresh2 else "red", fontsize=fontsize) plt.text(j, i + 0.1, '(' + text2 + ')', horizontalalignment="center", color="white" if cm2[i, j] > thresh2 else "red", fontsize=fontsize) plt.ylabel('True label', fontsize=20, color='#ca7b62') plt.xlabel('Predicted label', fontsize=20, color='#ca7b62') plt.text(0, -0.55, "ACC:" + format(sum(cm.diagonal()) / np.sum(cm) * 100, '.2f'), horizontalalignment='center') return fig