def float_norm(mat): """ Normalizes input between 0 and 1 Args: mat: Iterable Returns: Normalized input """ # mat = np.log(mat ** 2) valid_idx = ~np.isnan(mat) & ~np.isinf(mat) mat = mat - mat[valid_idx].min() max_scale = np.abs(mat[valid_idx]).max() if not max_scale == 0: mat = mat / max_scale mat[np.isnan(mat)] = 0 mat[np.isinf(mat)] = 0 if np.any(mat > 1): raise AssertionError('Unexpected condition at high bound ' + float2str(mat.max())) if np.any(mat < 0): raise AssertionError('Unexpected condition at low bound ' + float2str(mat.min())) return mat
def cm_analysis(y_true, y_pred, labels, ymap=None, figsize=(10, 10)): """ ## From : https://gist.github.com/hitvoice/36cf44689065ca9b927431546381a3f7 ## Github: Runqi Yang @hitvoice Generate matrix plot of confusion matrix with pretty annotations. The plot image is saved to disk. args: y_true: true label of the data, with shape (nsamples,) y_pred: prediction of the data, with shape (nsamples,) filename: filename of figure file to save labels: string array, name the order of class labels in the confusion matrix. use `clf.classes_` if using scikit-learn models. with shape (nclass,). ymap: dict: any -> string, length == nclass. if not None, map the labels & ys to more understandable strings. Caution: original y_true, y_pred and labels must align. figsize: the size of the figure plotted. """ from utils_base import float2str if ymap is not None: y_pred = [ymap[yi] for yi in y_pred] y_true = [ymap[yi] for yi in y_true] labels = [ymap[yi] for yi in labels] cm = confusion_matrix(y_true, y_pred, labels=labels) cm_sum = np.sum(cm, axis=1, keepdims=True) cm_perc = cm / cm_sum.astype(float) * 100 annot = np.empty_like(cm).astype(str) nrows, ncols = cm.shape for i in range(nrows): for j in range(ncols): c = cm[i, j] p = cm_perc[i, j] if False and i == j: s = cm_sum[i] annot[i, j] = float2str(p, 1) + "\%\n" + str(c) elif c == 0: annot[i, j] = '' else: annot[i, j] = float2str(p, 1) + "\%\n" + str(c) cm = pd.DataFrame(cm, index=labels, columns=labels) cm.index.name = 'Actual' cm.columns.name = 'Predicted' fig, ax = plt.subplots(figsize=figsize) sns.heatmap(cm, annot=annot, fmt='', ax=ax, cbar=False, cmap='Blues', annot_kws={ 'family': 'serif', 'size': font_size }) return fig
def show_classification_results(all_preds, y, ids, class_names, fold=None, mark_wrongs=False): """ Prints the results of the classification predictions in a way which allows for a comparison between the predictions and the true classes. Args: all_preds: A matrix of [ N_samples x N_classes ], with 1's on the predicted class y: A matrix of [ N_samples x N_classes ], with 1's on the true class ids: The id (a string) of each sample class_names: The unique classes in the classification problem (as strings) fold: The fold in which each sample belongs to (fold of cross validation) mark_wrongs: Put a marker next to misclassified samples Returns: Nothing """ from tabulate import tabulate from utils_base import float2str accuracy = np.sum(np.all(all_preds == y, axis=1)) / float(y.shape[0]) n_hots = np.sum(all_preds, axis=1) if ~np.all(n_hots == 1): too_hot = np.where(~(n_hots == 1))[-1] raise AssertionError( 'Predictions do not make sense because the following idxs had more than one hots ' + str(too_hot) + ' with the following hots ' + str(n_hots[too_hot])) n_hots = np.sum(y, axis=1) if ~np.all(n_hots == 1): too_hot = np.where(~(n_hots == 1))[-1] raise AssertionError( 'Ground truths do not make sense because the following idxs had more than one hots ' + str(too_hot) + ' with the following hots ' + str(n_hots[too_hot])) results = np.concatenate( (np.atleast_2d(ids).T, np.atleast_2d(class_names[np.argmax(all_preds, axis=1)]).T), axis=1) headers = ('AIR', 'Prediction') if fold is not None: results = np.concatenate((results, np.atleast_2d(fold).T), axis=1) headers = tuple(list(headers) + ['Fold']) if mark_wrongs: correct = [ i.replace('EE_lobby', 'EE-lobby').split('_')[1] for i in results[:, 0] ] == results[:, 1] results = results[~correct, :] print('Showing ' + str(np.sum(~correct)) + ' wrongs of ' + str(correct.size)) print(tabulate(results, headers=headers)) print('Overall Accuracy: ' + float2str(accuracy, 3))
def get_scaler_descaler(x, verbose=False): """ Creates scaling and descaling functions for preparation of training data and reconstruction from DNNs Args: x: Input data verbose: Verbose reporting Returns: Scaler function object Descaler function object """ import numpy as np from utils_base import float2str if x.ndim > 2: conced_x = np.concatenate(x, axis=0) else: conced_x = np.array(x) subval = np.min(conced_x, axis=0) scale_val = np.max(conced_x, axis=0) - subval scale_val[scale_val == 0] = 1 subval.shape = tuple([1, 1] + list(subval.shape)) scale_val.shape = tuple([1, 1] + list(scale_val.shape)) if verbose: print('Will construct scaler with subs: ' + float2str( subval) + "\n" + '... and scalers ' + float2str( scale_val)) def scaler(y): return (y - subval) / scale_val def descaler(y): return y * scale_val + subval return scaler, descaler
def accuracy_eval(x, y, cmodel, prefix=None): """ Accepts input data, labels and a model to predict the labels, which are then evaluated in terms of their accuracy. Can be combined with PostEpochWorker, to provide an evaluation of the accuracy in a custom way during the training of DNNs as: PostEpochWorker( (x_out_train[val_idxs, :, :], x_out_test[test_idxs, :, :]), (y_train[val_idxs, :], y_test[test_idxs, :]), model_filename, eval_fun=( lambda x, y, cmodel: accuracy_eval(x, y, cmodel, prefix='Val'), lambda x, y, cmodel: accuracy_eval(x, y, cmodel, prefix='Test')), eval_every_n_epochs=100) Args: x: Input data y: Labels cmodel: Trained model for inference prefix: Prefix for the reporting Returns: The predictions """ y_pred = np.argmax(cmodel.predict(x), axis=1).flatten() acc = np.sum(y_pred == np.argmax(y, axis=1)).flatten() / float(x.shape[0]) print(((prefix + ' ') if prefix is not None else '') + 'Accuracy: ' + float2str(acc, 4)) y_pred_out = np.zeros_like(y) for i in range(y_pred_out.shape[0]): y_pred_out[i, y_pred[i]] = True n_hots = np.sum(y_pred_out, axis=1) if ~np.all(n_hots == 1): too_hot = np.where(~(n_hots == 1))[-1] raise AssertionError( 'Predictions do not make sense because the following idxs had more than one hots ' + str(too_hot) + ' with the following hots ' + str(n_hots[too_hot])) return y_pred_out
def on_epoch_end(self, epoch, logs={}): from utils_base import float2str cval_loss = logs.get('val_loss') loss_name = 'val_loss' if cval_loss is None: loss_name = 'loss' cval_loss = logs.get('loss') if self.best_val_loss is None or cval_loss <= self.best_val_loss: self.update_since_last = True self.best_val_loss = cval_loss self.best_val_model = self.model if self.save_best: try: self.model.save(self.model_filename) except TypeError: print('Could not save model ' + self.model_filename) print 'At epoch : ' + str( epoch) + ' found new best ' + loss_name + ' model with ' + loss_name + ' ' + \ float2str(self.best_val_loss, 12) + ' ' + '\r', self.run_eval(epoch)