alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_val_p = mdl.predict(transformer.transform_batch(
            x_val_task, [t_ind] * len(x_val_task)),
                              batch_size=1024)
        val_scores_in += dirichlet_normality_score(mle_alpha_t, x_val_p)

    val_scores_in /= transformer.n_transforms

    labels = y_test.flatten() == single_class_ind

    plot_histogram_disc_loss_acc_thr(
        test_scores[labels],
        test_scores[~labels],
        path='../results',
        x_label_name='TransTransformations_Dscores_hits',
        val_inliers_score=val_scores_in)

    # # Dirichlet transforms with arcsin
    # neg_scores = -test_scores
    # norm_scores = neg_scores - np.min(neg_scores)
    # norm_scores = norm_scores / np.max(norm_scores)
    # arcsinh_scores = np.arcsinh(norm_scores * 10000)
    # inlier_arcsinh_score = arcsinh_scores[labels]
    # outlier_arcsinh_score = arcsinh_scores[~labels]
    # plot_histogram_disc_loss_acc_thr(inlier_arcsinh_score, outlier_arcsinh_score,
    #                                  '../results',
    #                                  'TransTransformations_arcsinh*10000_Dscores_hits')

    # Transforms without dirichlet
        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    labels = y_test.flatten() == single_class_ind

    plot_histogram_disc_loss_acc_thr(
        scores[labels],
        scores[~labels],
        path='../results',
        x_label_name='Transformations_Dscores_fashion')

    # Dirichlet transforms with arcsin
    neg_scores = -scores
    norm_scores = neg_scores - np.min(neg_scores)
    norm_scores = norm_scores / np.max(norm_scores)
    arcsinh_scores = np.arcsinh(norm_scores * 10000)
    inlier_arcsinh_score = arcsinh_scores[labels]
    outlier_arcsinh_score = arcsinh_scores[~labels]
    plot_histogram_disc_loss_acc_thr(
        inlier_arcsinh_score, outlier_arcsinh_score, '../results',
        'Transformations_arcsinh*10000_Dscores_fashion')

    # Transforms without dirichlet
    plt.title('scores_inliers')
    plt.show()
    plt.hist(scores_neg, bins=bins)
    plt.title('scores_outliers')
    plt.show()

    truth = np.concatenate(
        (np.zeros_like(scores_neg), np.ones_like(scores_pos)))
    preds = np.concatenate((scores_neg, scores_pos))
    fpr, tpr, roc_thresholds = roc_curve(truth, preds)
    roc_auc = auc(fpr, tpr)
    print(roc_auc)

    plot_histogram_disc_loss_acc_thr(plain_scores_test[labels],
                                     plain_scores_test[~labels],
                                     x_label_name='%s_scores_hits' %
                                     EXPERIMENT_NAME,
                                     path='../results',
                                     val_inliers_score=plain_scores_val)

    ## matrices
    # transform test
    transformations_inds_test = np.tile(np.arange(transformer.n_transforms),
                                        len(x_test))
    start_time = time.time()
    x_test_transformed = transformer.transform_batch(
        np.repeat(x_test, transformer.n_transforms, axis=0),
        transformations_inds_test)
    time_usage = str(
        datetime.timedelta(seconds=int(round(time.time() - start_time))))
    print("Time to perform transforms: " + time_usage)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_val_p = mdl.predict(transformer.transform_batch(
            x_val_task, [t_ind] * len(x_val_task)),
                              batch_size=1024)
        val_scores_in += dirichlet_normality_score(mle_alpha_t, x_val_p)

    val_scores_in /= transformer.n_transforms

    labels = y_test.flatten() == single_class_ind

    plot_histogram_disc_loss_acc_thr(
        test_scores[labels],
        test_scores[~labels],
        path='../results',
        x_label_name='KernelOnlyTransformations_Dscores_hits',
        val_inliers_score=val_scores_in)

    # Transforms without dirichlet
    plain_scores_test = np.zeros((len(x_test), ))
    for t_ind in tqdm(range(transformer.n_transforms)):
        # predictions for a single transformation
        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        plain_scores_test += x_test_p[:, t_ind]

    plain_scores_test /= transformer.n_transforms
    # val
    plain_scores_val = np.zeros((len(x_val_task), ))
    plt.title('scores_inliers')
    plt.show()
    plt.hist(scores_neg, bins=bins)
    plt.title('scores_outliers')
    plt.show()

    truth = np.concatenate(
        (np.zeros_like(scores_neg), np.ones_like(scores_pos)))
    preds = np.concatenate((scores_neg, scores_pos))
    fpr, tpr, roc_thresholds = roc_curve(truth, preds)
    roc_auc = auc(fpr, tpr)
    print(roc_auc)

    plot_histogram_disc_loss_acc_thr(
        plain_scores_test[labels],
        plain_scores_test[~labels],
        x_label_name='EnsembleTransformations_scores_hits',
        path='../results',
        val_inliers_score=plain_scores_val)

    # # Scores arcsinh
    # plain_neg_scores = 1 - plain_scores_test
    # plain_norm_scores = plain_neg_scores - np.min(plain_neg_scores)
    # plain_norm_scores = plain_norm_scores / plain_norm_scores.max()
    # plain_arcsinh_scores = np.arcsinh(plain_norm_scores * 1000000)
    #
    # plot_histogram_disc_loss_acc_thr(plain_arcsinh_scores[labels],
    #                                  plain_arcsinh_scores[~labels],
    #                                  path='../results',
    #                                  x_label_name='EnsembleTransformations_arcsinh*10000_scores_hits')

    ## matrices
    alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
    alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

    mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

    x_val_p = mdl.predict(
        transformer.transform_batch(x_val_task, [t_ind] * len(x_val_task)),
        batch_size=1024)
    val_scores_in += dirichlet_normality_score(mle_alpha_t, x_val_p)

  val_scores_in /= transformer.n_transforms

  labels = y_test.flatten() == single_class_ind

  plot_histogram_disc_loss_acc_thr(test_scores[labels], test_scores[~labels],
                                   path=results_folder,
                                   x_label_name='Dscores',
                                   val_inliers_score=val_scores_in)

  # Transforms without dirichlet
  plain_scores_test = np.zeros((len(x_test),))
  for t_ind in tqdm(range(transformer.n_transforms)):
    # predictions for a single transformation
    x_test_p = mdl.predict(
        transformer.transform_batch(x_test, [t_ind] * len(x_test)),
        batch_size=1024)
    plain_scores_test += x_test_p[:, t_ind]

  plain_scores_test /= transformer.n_transforms
  #val
  plain_scores_val = np.zeros((len(x_val_task),))
  for t_ind in tqdm(range(transformer.n_transforms)):
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_val_p = mdl.predict(transformer.transform_batch(
            x_val_task, [t_ind] * len(x_val_task)),
                              batch_size=1024)
        val_scores_in += dirichlet_normality_score(mle_alpha_t, x_val_p)

    val_scores_in /= transformer.n_transforms

    labels = y_test.flatten() == single_class_ind

    plot_histogram_disc_loss_acc_thr(test_scores[labels],
                                     test_scores[~labels],
                                     path=results_folder,
                                     x_label_name='Dscores',
                                     val_inliers_score=val_scores_in)

    # Transforms without dirichlet
    plain_scores_test = np.zeros((len(x_test), ))
    for t_ind in tqdm(range(transformer.n_transforms)):
        # predictions for a single transformation
        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        plain_scores_test += x_test_p[:, t_ind]

    plain_scores_test /= transformer.n_transforms
    # val
    plain_scores_val = np.zeros((len(x_val_task), ))
示例#8
0
import time
import datetime
from scripts.detached_transformer_od_hits import \
  plot_histogram_disc_loss_acc_thr
import matplotlib.pyplot as plt

data_path = os.path.join(
    PROJECT_PATH, 'results', 'Transformer_OD_Model',
    'ztf-real-bog-v1_Transformer-OD-Model-dirichlet_real_2019-11-22-0031.npz')
data = np.load(data_path)
data_dict = dict(data)

scores = data_dict['scores']
labels = data_dict['labels']
scores_val = data_dict['scores_val']
inliers_scores = scores[labels == 1]
outliers_scores = scores[labels != 1]

accuracies = data_dict['accuracies']
roc_thresholds = data_dict['roc_thresholds']

plot_histogram_disc_loss_acc_thr(inliers_scores,
                                 outliers_scores,
                                 path=None,
                                 x_label_name='Dscores',
                                 val_inliers_score=scores_val)

plt.plot(roc_thresholds, accuracies)
plt.xlim((scores.min(), scores.max()))
plt.show()