matrix_scores_val /= transformer.n_transforms
    labels = y_test.flatten() == single_class_ind

    # plot_matrix_score(x_test, matrix_scores, labels, plot_inliers=True,
    #                   n_to_plot=5)
    # plot_matrix_score(x_test, matrix_scores, labels, plot_inliers=False,
    #                   n_to_plot=5)

    plot_histogram_disc_loss_acc_thr(
        np.trace(matrix_scores_test[labels], axis1=1, axis2=2),
        np.trace(matrix_scores_test[~labels], axis1=1, axis2=2),
        path='../results',
        x_label_name='TransTransformations_matrixTrace_hits',
        val_inliers_score=np.trace(matrix_scores_val))

    entropy_scores_test = get_entropy(matrix_scores_test)
    entropy_scores_val = get_entropy(matrix_scores_val)
    plot_histogram_disc_loss_acc_thr(
        entropy_scores_test[labels],
        entropy_scores_test[~labels],
        path='../results',
        x_label_name='TransTransformations_entropy_scores_hits',
        val_inliers_score=entropy_scores_val)

    ## Get logits for xentropy
    # Get matrix scores
    matrix_scores_raw_test = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    for t_ind in tqdm(range(transformer.n_transforms)):
        test_specific_transform_indxs = np.where(
            transformations_inds_test == t_ind)
Пример #2
0
def test_model_original(transformer,
                        loader,
                        dataset_name='hits-4-c',
                        single_class_ind=1):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(results_dir)
    utils.check_path(save_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load-save data
    (x_train, y_train), (x_val, y_val), (x_test,
                                         y_test) = loader(return_val=True)
    normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test)
    utils.save_pickle(
        normal_data,
        os.path.join(save_dir,
                     'normal_data_%s_tf1_original.pkl' % dataset_name))
    # create model
    n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:],
                                       transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])
    # get inliers of specific class
    # get inliers
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    print(x_train_task.shape)
    # transform inliers
    transformations_inds = np.tile(np.arange(transformer.n_transforms),
                                   len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(
        np.repeat(x_train_task, transformer.n_transforms, axis=0),
        transformations_inds)
    print(x_train_task_transformed.shape)
    # train model
    batch_size = 128
    mdl.fit(x=x_train_task_transformed,
            y=to_categorical(transformations_inds),
            batch_size=batch_size,
            epochs=int(np.ceil(200 / transformer.n_transforms)))
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(
            observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='transformations')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='transformations-simple')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='transformations-entropy')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='transformations-xH')
    mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format(
        dataset_name, get_class_name_from_index(single_class_ind,
                                                dataset_name))
    mdl_weights_path = os.path.join(results_dir, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37
  (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025)
  (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375)
  (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999)
  #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
Пример #3
0
def test_model_loading(transformer,
                       mdl,
                       loader,
                       dataset_name='hits-4-c',
                       single_class_ind=1,
                       tf_version='tf1',
                       transformer_name='transformed',
                       model_name='resnet',
                       epochs=None):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(results_dir)
    utils.check_path(save_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load-save data
    normal_data_path = os.path.join(
        save_dir, 'normal_data_%s_%s_loading.pkl' % (dataset_name, tf_version))
    if os.path.exists(normal_data_path):
        (x_train,
         y_train), (x_val, y_val), (x_test,
                                    y_test) = pd.read_pickle(normal_data_path)
    else:
        (x_train, y_train), (x_val, y_val), (x_test,
                                             y_test) = loader(return_val=True)
        normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test)
        utils.save_pickle(normal_data, normal_data_path)
    # create model
    # n, k = (10, 4)
    # mdl = create_wide_residual_network(
    #     x_train.shape[1:], transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])
    # selec inliers
    x_train = x_train[y_train.flatten() == single_class_ind]
    x_val = x_val[y_val.flatten() == single_class_ind]

    # load-save transformed data
    transformed_data_path = os.path.join(
        save_dir, '%s_data_%s_%s_loading.pkl' %
        (transformer_name, dataset_name, tf_version))
    if os.path.exists(transformed_data_path):
        (x_train_transform_tf1,
         y_train_transform_tf1), (x_val_transform_tf1, y_val_transform_tf1), (
             x_test_transform_tf1,
             y_test_transform_tf1) = pd.read_pickle(transformed_data_path)
    else:
        # transform all data
        y_train_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                        len(x_train))
        x_train_transform_tf1 = transformer.transform_batch(
            np.repeat(x_train, transformer.n_transforms, axis=0),
            y_train_transform_tf1)
        y_val_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                      len(x_val))
        x_val_transform_tf1 = transformer.transform_batch(
            np.repeat(x_val, transformer.n_transforms, axis=0),
            y_val_transform_tf1)
        y_test_transform_tf1 = np.tile(np.arange(transformer.n_transforms),
                                       len(x_test))
        x_test_transform_tf1 = transformer.transform_batch(
            np.repeat(x_test, transformer.n_transforms, axis=0),
            y_test_transform_tf1)
        transformed_data = ((x_train_transform_tf1, y_train_transform_tf1),
                            (x_val_transform_tf1, y_val_transform_tf1),
                            (x_test_transform_tf1, y_test_transform_tf1))
        utils.save_pickle(transformed_data, transformed_data_path)
    print(x_train.shape)
    print(x_train_transform_tf1.shape)
    print(x_test.shape)
    print(x_test_transform_tf1.shape)
    # train model
    batch_size = 128
    if epochs is None:
        epochs = int(np.ceil(200 / transformer.n_transforms))
    mdl.fit(x=x_train_transform_tf1,
            y=to_categorical(y_train_transform_tf1),
            batch_size=batch_size,
            epochs=epochs)
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    x_pred_train = mdl.predict(x_train_transform_tf1, batch_size=1024)
    x_pred_test = mdl.predict(x_test_transform_tf1, batch_size=1024)
    print(x_pred_train.shape)
    print(x_pred_test.shape)
    for t_ind in range(transformer.n_transforms):
        ind_x_pred_equal_to_t_ind = np.where(y_train_transform_tf1 == t_ind)[0]
        observed_dirichlet = x_pred_train[ind_x_pred_equal_to_t_ind]
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)
        ind_x_pred_test_equal_to_t_ind = np.where(
            y_test_transform_tf1 == t_ind)[0]
        x_test_p = x_pred_test[ind_x_pred_test_equal_to_t_ind]
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='%s-%s-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='%s-%s-simple-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='%s-%s-entropy-loading-%s' %
                      (model_name, transformer_name, tf_version))
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='%s-%s-xH-loading-%s' %
                      (model_name, transformer_name, tf_version))
    mdl_weights_name = '{}_{}_{}_{}_loading_{}_weights.h5'.format(
        model_name, transformer_name, dataset_name, tf_version,
        get_class_name_from_index(single_class_ind, dataset_name))
    mdl_weights_path = os.path.join(results_dir, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
    reset_weights()
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:04:31.65
  (0.992217, 0.9895665, 0.99131725, 0.989478125)
  (0.99240075, 0.9900822499999999, 0.99215325, 0.9901300000000001)
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
Пример #4
0
def _kernal_plus_transformations_experiment(dataset_load_fn, dataset_name,
                                            single_class_ind, gpu_q):
    # gpu_to_use = gpu_q.get()
    # os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use

    (x_train, y_train), (x_test, y_test) = dataset_load_fn()

    if dataset_name in ['cats-vs-dogs']:
        transformer = None
    else:
        transformer = PlusKernelTransformer(translation_x=8,
                                            translation_y=8,
                                            rotations=1,
                                            flips=1,
                                            gauss=1,
                                            log=1)
        n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:],
                                       transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])

    # get inliers of specific class
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    # [0_i, ..., (N_transforms-1)_i, ..., ..., 0_N_samples, ...,
    # (N_transforms-1)_N_samples] shape: (N_transforms*N_samples,)
    transformations_inds = np.tile(np.arange(transformer.n_transforms),
                                   len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(
        np.repeat(x_train_task, transformer.n_transforms, axis=0),
        transformations_inds)
    batch_size = 128

    mdl.fit(
        x=x_train_task_transformed,
        y=to_categorical(transformations_inds),
        batch_size=batch_size,
        epochs=2  #int(np.ceil(200/transformer.n_transforms))
    )

    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(
            observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='kernel-plus-transformations')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-simple')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-entropy')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-xH')

    mdl_weights_name = '{}_kernel-plus-transformations_{}_{}_weights.h5'.format(
        dataset_name, get_class_name_from_index(single_class_ind,
                                                dataset_name),
        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
    mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)