Пример #1
0
def log_image_dir_snapshots(image_dir, channel_name='image_dir_snapshots', experiment=None, sample=16, seed=1234):
    """Logs visual snapshot of the directory with image data to Neptune.

    For a given directory with images it logs a sample of images as figure to Neptune.
    If the `image_dir` specified contains multiple folders it will sample per folder and create
    multiple figures naming each figure with the folder name.
    See snapshots per class here https://ui.neptune.ai/jakub-czakon/examples/e/EX-95/channels.

    Args:
        image_dir(str): path to directory with images.
        sample(int): number of images that should be sampled for plotting.
        channel_name(str): name of the neptune channel. Default is 'image_dir_snapshots'.
        experiment(neptune.experiemnts.Experiment or None): if the data should be logged to a particular
           neptune experiment it can be passed here. By default it is logged to the current experiment.
        seed(int): random state for the sampling of images.

    Examples:
        Initialize Neptune::

            import neptune
            from neptunecontrib.versioning.data import log_image_dir_snapshots
            neptune.init('USER_NAME/PROJECT_NAME')

        Log visual snapshot of image directory::

            PATH = 'train_dir/'
            with neptune.create_experiment():
                log_image_dir_snapshots(PATH)

    """
    _exp = experiment if experiment else neptune

    figs = _get_collated_images(image_dir, sample=sample, seed=seed)
    for fig in figs:
        send_figure(fig, channel_name=channel_name, experiment=_exp)
Пример #2
0
def log_ks_statistic(y_true,
                     y_pred,
                     experiment=None,
                     channel_name='metric_charts',
                     prefix=''):
    """Creates and logs KS statistics curve and KS statistics score to Neptune.

    Kolmogorov-Smirnov statistics chart can be calculated for true positive rates (TPR) and true negative rates (TNR)
    for each threshold and plotted on a chart.
    The maximum distance from TPR to TNR can be treated as performance metric.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Create and log KS statistics curve and KS statistics score to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_ks_statistic

            neptune.init()
            with neptune.create_experiment():
                log_ks_statistic(y_test, y_test_pred)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred.shape
    ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'

    _exp = experiment if experiment else neptune

    res = binary_ks_curve(y_true, y_pred[:, 1])
    ks_stat = res[3]
    _exp.log_metric(prefix + 'ks_statistic', ks_stat)

    fig, ax = plt.subplots()
    plt_metrics.plot_ks_statistic(y_true, y_pred, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
 def draw_concept_symbol_matrix(self):
     figure, ax = plt.subplots(figsize=(20, 5))
     figure.suptitle(f'Concept-symbol matrix {self.epoch_counter}')
     g = sns.heatmap(self.concept_symbol_matrix, annot=True, fmt='.2f', ax=ax)
     g.set_title(f'Concept-symbol matrix {self.epoch_counter}')
     send_figure(figure, channel_name=self.prefix + 'concept_symbol_matrix')
     plt.close()
Пример #4
0
def log_class_metrics_by_threshold(y_true,
                                   y_pred_pos,
                                   experiment=None,
                                   channel_name='metrics_by_threshold',
                                   prefix=''):
    """Creates metric/threshold charts for each metric and logs them to Neptune.

    Metrics for which charsta re created and logged are: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score',
    'matthews_corrcoef', 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value',
    'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate'

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metrics_by_threshold'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Logs metric/threshold charts to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_class_metrics_by_threshold

            neptune.init()
            with neptune.create_experiment():
                log_class_metrics_by_threshold(y_test, y_test_pred[:,1])

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred_pos.shape
    ) == 1, 'y_pred_pos needs to be 1D prediction for positive class'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    figs = _plot_class_metrics_by_threshold(y_true, y_pred_pos)

    for fig in figs:
        send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
        plt.close()
 def save_codebook(self, weight_list, epoch, label):
     figure, axes = plt.subplots(1, 3, sharey=True, figsize=(20, 5))
     figure.suptitle(f'Epoch {epoch}')
     for i, (matrix, ax) in enumerate(zip(weight_list, axes)):
         g = sns.heatmap(matrix, annot=True, fmt='.2f', ax=ax)
         g.set_title(f'{label} {i}')
     send_figure(figure, channel_name=label)
     plt.close()
Пример #6
0
def log_precision_recall_auc(y_true,
                             y_pred,
                             experiment=None,
                             channel_name='metric_charts',
                             prefix=''):
    """Creates and logs Precision Recall curve and Average precision score to Neptune.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Logs Precision Recall curve and Average precision score to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_precision_recall_auc

            neptune.init()
            with neptune.create_experiment():
                log_precision_recall_auc(y_test, y_test_pred)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred.shape
    ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    avg_precision = sk_metrics.average_precision_score(y_true, y_pred[:, 1])
    _exp.log_metric(prefix + 'avg_precision', avg_precision)

    fig, ax = plt.subplots()
    plt_metrics.plot_precision_recall(y_true, y_pred, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
Пример #7
0
def log_confusion_matrix(y_true,
                         y_pred_class,
                         experiment=None,
                         channel_name='metric_charts',
                         prefix=''):
    """Creates a confusion matrix figure and logs it in Neptune.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Log confusion matrix to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_confusion_matrix

            neptune.init()
            with neptune.create_experiment():
                log_confusion_matrix(y_test, y_test_pred[:,1]>0.5)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred_class.shape
    ) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    fig, ax = plt.subplots()
    _plot_confusion_matrix(y_true, y_pred_class, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
Пример #8
0
def log_prediction_distribution(y_true,
                                y_pred_pos,
                                experiment=None,
                                channel_name='metric_charts',
                                prefix=''):
    """Generates prediction distribution plot from predictions and true labels.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Plot prediction distribution::

            from neptunecontrib.monitoring.metrics import log_prediction_distribution

            log_prediction_distribution(y_test, y_test_pred[:, 1])
    """
    assert len(
        y_pred_pos.shape
    ) == 1, 'y_pred_pos needs to be 1D prediction for positive class'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    fig, ax = plt.subplots()
    _plot_prediction_distribution(y_true, y_pred_pos, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
 def on_epoch_end(self, *args):
     loader = DataLoader(self.dataset,
                         batch_size=500,
                         drop_last=False,
                         shuffle=True)
     input, _ = next(iter(loader))
     g1, g2 = self.get_gradients(input)
     neptune.send_text('color_grad', str(g1))
     neptune.send_text('shape_grad', str(g2))
     ax = sns.heatmap(torch.cat([g1, g2], dim=0),
                      xticklabels=['color', 'shape'],
                      yticklabels=['$m_1$', '$m_2$'])
     figure = ax.get_figure()
     send_figure(figure, channel_name=f'{self.label} grads')
     figure.savefig('fig.png')
     plt.close(figure)
 def visualize_embeddings(self):
     embeddings = self.game.receiver.embedding.weight.detach().transpose(
         1, 0)
     pca = PCA(n_components=2)
     embeddings_projected = pca.fit_transform(embeddings)
     np.savetxt('embs.txt', embeddings_projected)
     neptune.send_artifact('embs.txt')
     ax = sns.scatterplot(x=embeddings_projected[:, 0],
                          y=embeddings_projected[:, 1])
     for i in range(10):
         ax.annotate(str(i), embeddings_projected[i], size=20)
     sns.despine(left=True, bottom=True)
     plt.xlabel('First principal component')
     plt.ylabel('Second principal component')
     figure = ax.get_figure()
     send_figure(figure, channel_name='embeddings')
     figure.savefig('figx.png')
     plt.close(figure)
 def visualize(self, dimensionality_reduction_transform):
     *_, last_activations = zip(*[
         activations
         for (key, activations) in sorted(self.activations.items())
     ])
     activations_projected = dimensionality_reduction_transform.fit_transform(
         np.vstack(last_activations))
     df = pd.DataFrame.from_dict({
         'color': (['blue'] * 5 + ['cyan'] * 5 + ['gray'] * 5 +
                   ['green'] * 5 + ['magenta'] * 5),
         'shape': ['box', 'sphere', 'cylinder', 'torus', 'ellipsoid'] * 5,
         'x':
         activations_projected[:, 0],
         'y':
         activations_projected[:, 1]
     })
     sns.set(style="whitegrid")
     ax = sns.scatterplot(x="x",
                          y="y",
                          hue="color",
                          style="shape",
                          data=df,
                          legend=False,
                          palette=dict(blue='blue',
                                       cyan='cyan',
                                       gray='gray',
                                       green='green',
                                       magenta='magenta'),
                          markers=('s', 'o', 'D', 'X', '^'),
                          s=70)
     sns.despine(left=True, bottom=True)
     plt.xlabel('First principal component')
     plt.ylabel('Second principal component')
     figure = ax.get_figure()
     send_figure(figure, channel_name=self.label)
     figure.savefig('fig.png')
     plt.close(figure)
Пример #12
0
def log_fairness_classification_metrics(y_true, y_pred_class, y_pred_score, sensitive_attributes,
                                        favorable_label, unfavorable_label,
                                        privileged_groups, unprivileged_groups,
                                        experiment=None, prefix=''):
    """Creates fairness metric charts, calculates fairness classification metrics and logs them to Neptune.

    Class-based metrics that are logged: 'true_positive_rate_difference','false_positive_rate_difference',
    'false_omission_rate_difference', 'false_discovery_rate_difference', 'error_rate_difference',
    'false_positive_rate_ratio', 'false_negative_rate_ratio', 'false_omission_rate_ratio',
    'false_discovery_rate_ratio', 'error_rate_ratio', 'average_odds_difference', 'disparate_impact',
    'statistical_parity_difference', 'equal_opportunity_difference', 'theil_index',
    'between_group_theil_index', 'between_all_groups_theil_index', 'coefficient_of_variation',
    'between_group_coefficient_of_variation', 'between_all_groups_coefficient_of_variation',
    'generalized_entropy_index', 'between_group_generalized_entropy_index',
    'between_all_groups_generalized_entropy_index'

    Charts are logged to the 'metric_by_group' channel: 'confusion matrix', 'TPR', 'TNR', 'FPR', 'FNR', 'PPV', 'NPV',
    'FDR', 'FOR', 'ACC', 'error_rate', 'selection_rate', 'power', 'precision', 'recall',
    'sensitivity', 'specificity'.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1.
        y_pred_score (array-like, shape (n_samples)): Class predictions with values from 0 to 1. Default None.
        sensitive_attributes (pandas.DataFrame, shape (n_samples, k)): datafame containing only sensitive columns.
        favorable_label (str or int): label that is favorable, brings positive value to a person being classified.
        unfavorable_label (str or int): label that is unfavorable, brings positive value to a person being classified.
        privileged_groups (dict): dictionary with column names and list of values for those columns that
           belong to the privileged groups.
        unprivileged_groups (dict): dictionary with column names and list of values for those columns that
           belong to the unprivileged groups.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test.
        Log metrics and performance curves to Neptune::

            import neptune
            from neptunecontrib.monitoring.fairness import log_fairness_classification_metrics

            neptune.init()
            with neptune.create_experiment():
                log_fairness_classification_metrics(y_true, y_pred_class, y_pred_score, test[['race']],
                                                    favorable_label='granted_parole',
                                                    unfavorable_label='not_granted_parole',
                                                    privileged_groups={'race':['Caucasian']},
                                                    privileged_groups={'race':['African-American','Hispanic]},
                                                    )

        Check out this experiment https://ui.neptune.ai/jakub-czakon/model-fairness/e/MOD-92/logs.

    """
    _exp = experiment if experiment else neptune
    expect_not_a_run(_exp)

    bias_info = {'favorable_label': favorable_label,
                 'unfavorable_label': unfavorable_label,
                 'protected_columns': sensitive_attributes.columns.tolist()}

    privileged_info = _fmt_priveleged_info(privileged_groups, unprivileged_groups)

    ground_truth_test = _make_dataset(sensitive_attributes, y_true, **bias_info, **privileged_info)
    prediction_test = _make_dataset(sensitive_attributes, y_pred_class, y_pred_score, **bias_info, **privileged_info)

    clf_metric = ClassificationMetric(ground_truth_test, prediction_test, **privileged_info)

    _log_fairness_metrics(clf_metric, _exp, prefix)

    fig = _plot_confusion_matrix_by_group(clf_metric, figsize=(12, 4))
    plt.tight_layout()
    plt.close()
    send_figure(fig, channel_name=prefix + 'metrics_by_group')

    group_metrics = ['TPR', 'TNR', 'FPR', 'FNR', 'PPV', 'NPV', 'FDR', 'FOR',
                     'ACC', 'error_rate', 'selection_rate', 'power',
                     'precision', 'recall', 'sensitivity', 'specificity']

    for metric_name in group_metrics:
        fig, ax = plt.subplots(figsize=(12, 8))
        _plot_performance_by_group(clf_metric, metric_name, ax)
        send_figure(fig, experiment=_exp, channel_name=prefix + 'metrics_by_group')
        plt.close()