def float_norm(mat):
    """
    Normalizes input between 0 and 1

    Args:
        mat: Iterable

    Returns: Normalized input

    """
    # mat = np.log(mat ** 2)
    valid_idx = ~np.isnan(mat) & ~np.isinf(mat)
    mat = mat - mat[valid_idx].min()
    max_scale = np.abs(mat[valid_idx]).max()
    if not max_scale == 0:
        mat = mat / max_scale
    mat[np.isnan(mat)] = 0
    mat[np.isinf(mat)] = 0
    if np.any(mat > 1):
        raise AssertionError('Unexpected condition at high bound ' +
                             float2str(mat.max()))
    if np.any(mat < 0):
        raise AssertionError('Unexpected condition at low bound ' +
                             float2str(mat.min()))
    return mat
예제 #2
0
def cm_analysis(y_true, y_pred, labels, ymap=None, figsize=(10, 10)):
    """

    ## From : https://gist.github.com/hitvoice/36cf44689065ca9b927431546381a3f7
    ## Github: Runqi Yang @hitvoice

    Generate matrix plot of confusion matrix with pretty annotations.
    The plot image is saved to disk.
    args: 
      y_true:    true label of the data, with shape (nsamples,)
      y_pred:    prediction of the data, with shape (nsamples,)
      filename:  filename of figure file to save
      labels:    string array, name the order of class labels in the confusion matrix.
                 use `clf.classes_` if using scikit-learn models.
                 with shape (nclass,).
      ymap:      dict: any -> string, length == nclass.
                 if not None, map the labels & ys to more understandable strings.
                 Caution: original y_true, y_pred and labels must align.
      figsize:   the size of the figure plotted.
    """

    from utils_base import float2str

    if ymap is not None:
        y_pred = [ymap[yi] for yi in y_pred]
        y_true = [ymap[yi] for yi in y_true]
        labels = [ymap[yi] for yi in labels]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum.astype(float) * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if False and i == j:
                s = cm_sum[i]
                annot[i, j] = float2str(p, 1) + "\%\n" + str(c)
            elif c == 0:
                annot[i, j] = ''
            else:
                annot[i, j] = float2str(p, 1) + "\%\n" + str(c)
    cm = pd.DataFrame(cm, index=labels, columns=labels)
    cm.index.name = 'Actual'
    cm.columns.name = 'Predicted'
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(cm,
                annot=annot,
                fmt='',
                ax=ax,
                cbar=False,
                cmap='Blues',
                annot_kws={
                    'family': 'serif',
                    'size': font_size
                })
    return fig
예제 #3
0
def show_classification_results(all_preds,
                                y,
                                ids,
                                class_names,
                                fold=None,
                                mark_wrongs=False):
    """
    Prints the results of the classification predictions in a way which allows for a comparison
    between the predictions and the true classes.

    Args:
        all_preds: A matrix of [ N_samples x N_classes ], with 1's on the predicted class
        y: A matrix of [ N_samples x N_classes ], with 1's on the true class
        ids: The id (a string) of each sample
        class_names: The unique classes in the classification problem (as strings)
        fold: The fold in which each sample belongs to (fold of cross validation)
        mark_wrongs: Put a marker next to misclassified samples

    Returns:
        Nothing

    """
    from tabulate import tabulate
    from utils_base import float2str
    accuracy = np.sum(np.all(all_preds == y, axis=1)) / float(y.shape[0])
    n_hots = np.sum(all_preds, axis=1)
    if ~np.all(n_hots == 1):
        too_hot = np.where(~(n_hots == 1))[-1]
        raise AssertionError(
            'Predictions do not make sense because the following idxs had more than one hots '
            + str(too_hot) + ' with the following hots ' +
            str(n_hots[too_hot]))
    n_hots = np.sum(y, axis=1)
    if ~np.all(n_hots == 1):
        too_hot = np.where(~(n_hots == 1))[-1]
        raise AssertionError(
            'Ground truths do not make sense because the following idxs had more than one hots '
            + str(too_hot) + ' with the following hots ' +
            str(n_hots[too_hot]))
    results = np.concatenate(
        (np.atleast_2d(ids).T,
         np.atleast_2d(class_names[np.argmax(all_preds, axis=1)]).T),
        axis=1)
    headers = ('AIR', 'Prediction')
    if fold is not None:
        results = np.concatenate((results, np.atleast_2d(fold).T), axis=1)
        headers = tuple(list(headers) + ['Fold'])
    if mark_wrongs:
        correct = [
            i.replace('EE_lobby', 'EE-lobby').split('_')[1]
            for i in results[:, 0]
        ] == results[:, 1]
        results = results[~correct, :]
        print('Showing ' + str(np.sum(~correct)) + ' wrongs of ' +
              str(correct.size))
    print(tabulate(results, headers=headers))

    print('Overall Accuracy: ' + float2str(accuracy, 3))
def get_scaler_descaler(x, verbose=False):
    """
    Creates scaling and descaling functions for preparation of training data and reconstruction
    from DNNs

    Args:
        x: Input data
        verbose: Verbose reporting

    Returns:
        Scaler function object
        Descaler function object

    """
    import numpy as np
    from utils_base import float2str
    if x.ndim > 2:
        conced_x = np.concatenate(x, axis=0)
    else:
        conced_x = np.array(x)
    subval = np.min(conced_x, axis=0)
    scale_val = np.max(conced_x, axis=0) - subval
    scale_val[scale_val == 0] = 1

    subval.shape = tuple([1, 1] + list(subval.shape))
    scale_val.shape = tuple([1, 1] + list(scale_val.shape))

    if verbose:
        print('Will construct scaler with subs: ' + float2str(
            subval) + "\n" + '... and scalers ' + float2str(
            scale_val))

    def scaler(y):
        return (y - subval) / scale_val

    def descaler(y):
        return y * scale_val + subval

    return scaler, descaler
def accuracy_eval(x, y, cmodel, prefix=None):
    """
    Accepts input data, labels and a model to predict the labels, which are then evaluated in
    terms of their accuracy.

    Can be combined with PostEpochWorker, to provide an evaluation of the accuracy in a custom
    way during the training of DNNs as:

     PostEpochWorker(
                (x_out_train[val_idxs, :, :],
                 x_out_test[test_idxs, :, :]),
                (y_train[val_idxs, :],
                y_test[test_idxs, :]),
                model_filename,
                eval_fun=(
                    lambda x, y, cmodel: accuracy_eval(x, y, cmodel, prefix='Val'),
                    lambda x, y, cmodel: accuracy_eval(x, y, cmodel, prefix='Test')),
                eval_every_n_epochs=100)

    Args:
        x: Input data
        y: Labels
        cmodel: Trained model for inference
        prefix: Prefix for the reporting

    Returns: The predictions

    """
    y_pred = np.argmax(cmodel.predict(x), axis=1).flatten()
    acc = np.sum(y_pred == np.argmax(y, axis=1)).flatten() / float(x.shape[0])
    print(((prefix + ' ') if prefix is not None else '') + 'Accuracy: ' +
          float2str(acc, 4))
    y_pred_out = np.zeros_like(y)
    for i in range(y_pred_out.shape[0]):
        y_pred_out[i, y_pred[i]] = True
    n_hots = np.sum(y_pred_out, axis=1)
    if ~np.all(n_hots == 1):
        too_hot = np.where(~(n_hots == 1))[-1]
        raise AssertionError(
            'Predictions do not make sense because the following idxs had more than one hots '
            + str(too_hot) + ' with the following hots ' +
            str(n_hots[too_hot]))
    return y_pred_out
    def on_epoch_end(self, epoch, logs={}):
        from utils_base import float2str

        cval_loss = logs.get('val_loss')
        loss_name = 'val_loss'
        if cval_loss is None:
            loss_name = 'loss'
            cval_loss = logs.get('loss')
        if self.best_val_loss is None or cval_loss <= self.best_val_loss:
            self.update_since_last = True
            self.best_val_loss = cval_loss
            self.best_val_model = self.model
            if self.save_best:
                try:
                    self.model.save(self.model_filename)
                except TypeError:
                    print('Could not save model ' + self.model_filename)
            print 'At epoch : ' + str(
                epoch) + ' found new best ' + loss_name + ' model with ' + loss_name + ' ' + \
                  float2str(self.best_val_loss, 12) + '                         ' + '\r',

        self.run_eval(epoch)