Exemplo n.º 1
0
def figure_4_intrinsic_validation(if_dict,
                                  vocab_dict_path,
                                  input_model_path,
                                  max_length,
                                  random=False,
                                  remove_ratio=0.5):
    """
    TODO: this one is designed only for the interpretation feature ground truth. We should also design one for each interpretation method.
    """
    with open(if_dict, 'rb') as f:
        if_dict = pkl.load(f)

    input_model = models.load_model(input_model_path)

    x_train, y_train, l_train, _, _, _, vocab_dict = train_test_split(
        settings.data_source,
        settings.test_split,
        settings.sequence_length,
        vocabulary=vocab_dict_path)

    batches = batch_iter(x_train,
                         y_train,
                         l_train,
                         settings.batch_size,
                         num_epochs=1)

    avg_drop = 0

    for batch in batches:
        x_data, y_data, l_data = batch

        predictions = input_model.predict(x_data)

        # Only change random and remove_ratio
        """keep most_relevant unchanged"""
        x_perturb = perturb_input(x_data,
                                  y_data,
                                  l_data,
                                  if_dict,
                                  vocab_dict,
                                  max_length,
                                  random=random,
                                  most_relevant=True,
                                  remove_ratio=remove_ratio)

        predictions_perturb = input_model.predict(x_perturb)

        for doc_idx in range(len(x_data)):
            label = np.argmax(predictions[doc_idx])
            avg_drop += max(
                0, predictions[doc_idx][label] - predictions_perturb[doc_idx]
                [label]) / predictions[doc_idx][label]

    return avg_drop / len(x_train)
Exemplo n.º 2
0
import src.settings as settings
from src.w2v import train_word2vec
from src.cnn_clf import cnn_classifier
from src.data_helpers import train_test_split
from src.plot import plot_loss_accuracy

np.random.seed(0)


# Loading Dataset
# ----------------------------------------------------------------------------------------------------------------------
print("Loading data...")
start = time.time()

x_train, y_train, l_train, x_test, y_test, l_test, vocabulary = train_test_split(settings.data_source,
                                                                                 settings.test_split,
                                                                                 settings.sequence_length,
                                                                                 max_num_words=settings.max_words)

# Save vocabulary processor
with open(settings.vocabulary_dict, 'wb') as f:
    pkl.dump(vocabulary, f, pkl.HIGHEST_PROTOCOL)


assert len(x_train) == len(y_train) == len(l_train)
assert len(x_test) == len(y_test) == len(l_test)
print('Dataset has been built successfully.')
print(f'Run time: {round(time.time() - start)} second')
print(f'Number of training samples: {len(x_train)}')
print(f'Number of testing samples: {len(x_test)}')
print("Vocabulary Size: {:d}".format(len(vocabulary)))
print("Model type is", settings.model_type)
Exemplo n.º 3
0
def generate_if_dict(vocab_dict, input_model_path, window_size,
                     target_layer_name, max_length):
    """
    This function generate interpretation features using the dataset that has been trained on the same dataset that we
    need to generate the interpretation features from.
    IMPORTANT: This function need to run on the Amazon_Yelp dataset on the server.
    """
    tf.compat.v1.disable_eager_execution()

    if_dict = dict(
    )  # the summation of the relevance scores of each interpretation feature in each document
    if_count = dict()  # the count of each interpretation feature
    if_average = dict(
    )  # average relevance score for each interpretation features

    input_model = models.load_model(input_model_path)

    x_train, y_train, l_train, _, _, _, vocab_dict = train_test_split(
        settings.data_source,
        settings.test_split,
        settings.sequence_length,
        vocabulary=vocab_dict)
    print('Loading data is over!')

    batches = batch_iter(x_train,
                         y_train,
                         l_train,
                         settings.batch_size,
                         num_epochs=1)

    y_prediction = input_model.predict(x_train)
    y_prediction = np.argmax(y_prediction, axis=1)
    print('Classification is over!')

    for batch_idx, batch in enumerate(batches):
        x_data, y_data, l_data = batch

        # Get the heatmap interpretation of the interpretation method
        interpretation_heatmap = get_interpretation('grad_cam', input_model,
                                                    x_data, y_data,
                                                    window_size,
                                                    target_layer_name)

        for sample_idx in range(len(x_data)):
            # real sample length
            real_length = l_data[
                sample_idx] if l_data[sample_idx] < max_length else max_length
            # get the interpretation feature information
            if y_data[sample_idx] == y_prediction[(batch_idx * len(x_data)) +
                                                  sample_idx]:
                for j, w in enumerate(
                        interpretation_heatmap[sample_idx][:real_length]):
                    if w > 0 and x_data[sample_idx][j] in vocab_dict:
                        if_dict.setdefault(y_data[sample_idx], dict())
                        if_dict[y_data[sample_idx]][vocab_dict[
                            x_data[sample_idx]
                            [j]]] = if_dict[y_data[sample_idx]].setdefault(
                                vocab_dict[x_data[sample_idx][j]], 0) + w
                        if_count.setdefault(y_data[sample_idx], dict())
                        if_count[y_data[sample_idx]][vocab_dict[
                            x_data[sample_idx]
                            [j]]] = if_count[y_data[sample_idx]].setdefault(
                                vocab_dict[x_data[sample_idx][j]], 0) + 1

    print('Generating the interpretation features for all the data is over')
    # get the average importance values of the interpretation_features
    for category, sub_dict in if_dict.items():
        for word, value in sub_dict.items():
            if_average.setdefault(category, dict())
            if_average[category][word] = np.divide(value,
                                                   if_count[category][word])

    post_processed_if_dict = post_processing(if_average, if_count)

    with open(settings.post_processed_IF, 'wb') as f:
        pkl.dump(post_processed_if_dict, f, pkl.HIGHEST_PROTOCOL)
    with open(settings.IF, 'wb') as f:
        pkl.dump(if_dict, f, pkl.HIGHEST_PROTOCOL)
    with open(settings.count_IF, 'wb') as f:
        pkl.dump(if_count, f, pkl.HIGHEST_PROTOCOL)
    with open(settings.average_IF, 'wb') as f:
        pkl.dump(if_average, f, pkl.HIGHEST_PROTOCOL)
Exemplo n.º 4
0
def generate_if_dict_de(vocab_dict,
                        input_model_path,
                        interpretation_method='grad*input'):
    """
    :param vocab_dict: vocabulary dictionary path
    :param input_model_path: the labels for the input data
    :param interpretation_method: {'saliency', 'grad*input', 'intgrad', 'elrp', 'deeplift'}
    """
    with DeepExplain(session=tf.compat.v1.keras.backend.get_session()) as de:

        if_dict = dict(
        )  # the summation of the relevance scores of each interpretation feature in each document
        if_count = dict()  # the count of each interpretation feature
        if_average = dict(
        )  # average relevance score for each interpretation features

        new_model = models.load_model(input_model_path)

        x_train, y_train, l_train, _, _, _, vocab_dict = train_test_split(
            settings.data_source,
            settings.test_split,
            settings.sequence_length,
            vocabulary=vocab_dict)

        # convert the index labels to one-hot format
        ys = np.zeros(shape=(len(y_train), settings.num_classes))
        for i, label in enumerate(y_train):
            ys[i, label] = 1

        # shape [None, 50, 128]
        embedding_tensor = new_model.layers[1].output

        # Get tensor before the final activation
        # shape [batch_size, num_classes]
        logits_tensor = new_model.layers[-2].output

        for batch_idx in range(0,
                               len(x_train) // settings.batch_size,
                               settings.batch_size):

            next_batch = batch_idx + settings.batch_size

            x_train_batch = x_train[batch_idx:next_batch]
            y_train_batch = y_train[batch_idx:next_batch]

            y_prediction = new_model.predict(x_train_batch)
            y_prediction = np.argmax(y_prediction, axis=1)

            # Interpretation ===========================================================================================

            # Evaluate the embedding tensor on the model input

            embedding_function = K.function([new_model.input],
                                            [embedding_tensor])
            embedding_output = embedding_function([x_train_batch])

            # Run DeepExplain with the embedding as input
            heat_map = de.explain(interpretation_method,
                                  logits_tensor * ys[batch_idx:next_batch],
                                  embedding_tensor, embedding_output[0])

            # sum the values for the embedding dimension to get the relevance value of each word
            interpretation_heatmap = np.sum(heat_map, axis=-1)
            interpretation_heatmap = np.maximum(interpretation_heatmap, 0)

            for sample_idx in range(settings.batch_size):
                words = [
                    vocab_dict[w] if w in vocab_dict else '<PAD/>'
                    for w in x_train_batch[sample_idx][:l_train[sample_idx]]
                ]
                sample_heatmap = np.round(
                    interpretation_heatmap[sample_idx][:l_train[sample_idx]] /
                    (max(interpretation_heatmap[sample_idx]
                         [:l_train[sample_idx]]) + 0.00001), 2)

                # get the interpretation feature information
                if y_train_batch[sample_idx] == y_prediction[sample_idx]:
                    for j, w in enumerate(sample_heatmap):
                        if w > 0:
                            if_dict.setdefault(y_train_batch[sample_idx],
                                               dict())
                            if_dict[y_train_batch[sample_idx]][
                                words[j]] = if_dict[
                                    y_train_batch[sample_idx]].setdefault(
                                        words[j], 0) + w
                            if_count.setdefault(y_train_batch[sample_idx],
                                                dict())
                            if_count[y_train_batch[sample_idx]][
                                words[j]] = if_count[
                                    y_train_batch[sample_idx]].setdefault(
                                        words[j], 0) + 1

        # get the average importance values of the interpretation_features
        for category, sub_dict in if_dict.items():
            for word, value in sub_dict.items():
                if_average.setdefault(category, dict())
                if_average[category][word] = np.divide(
                    value, if_count[category][word])

        post_processed_if_dict = post_processing(if_average,
                                                 if_count,
                                                 alpha_threshold=0.2)

        with open(settings.post_processed_IF, 'wb') as f:
            pkl.dump(post_processed_if_dict, f, pkl.HIGHEST_PROTOCOL)
        with open(settings.IF, 'wb') as f:
            pkl.dump(if_dict, f, pkl.HIGHEST_PROTOCOL)
        with open(settings.count_IF, 'wb') as f:
            pkl.dump(if_count, f, pkl.HIGHEST_PROTOCOL)
        with open(settings.average_IF, 'wb') as f:
            pkl.dump(if_average, f, pkl.HIGHEST_PROTOCOL)
Exemplo n.º 5
0
def table_3(if_dict,
            vocab_dict_path,
            input_model_path,
            num_classes,
            max_length,
            threshold=0.3,
            interpretation_method='grad*input'):
    """
    MR Dataset
    Return the interpretation effectiveness using different PGTs on the MR dataset in terms of Kappa enter-agreement,
    interpretation precision and recall
    :param if_dict: the dictionary of the interpretation features
    :param vocab_dict_path: the vocabulary dictionary of the trained model
    :param input_model_path: the standard cnn model trained normally on the MR dataset
    :param num_classes: settings.num_classes of the dataset
    :param max_length: maximum sequence length of the documents from settings.sequence_length
    :param threshold: the threshold for the interpretation feature to be considered
    :param interpretation_method:
    """
    precision = 0
    recall = 0
    kappa = 0

    with open(if_dict, 'rb') as f:
        if_dict = pkl.load(f)

    x_train, y_train, l_train, _, _, _, vocab_dict = train_test_split(
        settings.data_source,
        settings.test_split,
        settings.sequence_length,
        vocabulary=vocab_dict_path)

    # Get the heatmap interpretation of the interpretation method
    interpretation_heatmap, y_prediction = get_interpretation_de(
        input_model_path,
        x_train,
        y_train,
        num_classes,
        interpretation_method=interpretation_method)

    #  real sample length
    real_length = [l if l < max_length else max_length for l in l_train]

    counter = 0
    for sample_idx in range(len(x_train)):
        if y_train[sample_idx] == y_prediction[sample_idx]:
            counter += 1
            a, b, c, d = 0, 0, 0, 0

            for word_idx in range(real_length[sample_idx]):

                # TODO: For MR Dataset only
                if y_train[sample_idx] == 0:
                    l1 = 1
                    l2 = 2
                else:
                    l1 = 4
                    l2 = 5

                word_relevance_score = 0
                if x_train[sample_idx][word_idx] in vocab_dict and (
                        l1 in if_dict
                        or l2 in if_dict):  # y_train[sample_idx] in if_dict:
                    if l1 in if_dict:  # TODO: no need for this 'if' or 'else' if we are using 5 classes
                        if vocab_dict[x_train[sample_idx]
                                      [word_idx]] in if_dict[l1]:
                            word_relevance_score = 1
                    elif l2 in if_dict:
                        if vocab_dict[x_train[sample_idx]
                                      [word_idx]] in if_dict[l2]:
                            word_relevance_score = 1

                if word_relevance_score == 1 and interpretation_heatmap[
                        sample_idx][word_idx] >= threshold:
                    a += 1  # TP
                elif word_relevance_score == 1 and interpretation_heatmap[
                        sample_idx][word_idx] < threshold:
                    b += 1  # FN
                elif word_relevance_score == 0 and interpretation_heatmap[
                        sample_idx][word_idx] >= threshold:
                    c += 1  # FP
                elif word_relevance_score == 0 and interpretation_heatmap[
                        sample_idx][word_idx] < threshold:
                    d += 1  # TN

            # Precision and Recall
            precision += a / ((a + c) + 1e-10)
            recall += a / ((a + b) + 1e-10)
            # Kappa
            po = (a + d) / (a + b + c + d)
            p1 = ((a + b) / (a + b + c + d)) * ((a + c) / (a + b + c + d))
            p0 = ((d + b) / (a + b + c + d)) * ((d + c) / (a + b + c + d))
            pe = p1 + p0
            kappa += (po - pe) / ((1 - pe) + 1e-10)

    precision /= counter
    recall /= counter
    kappa /= counter

    print(
        f'Interpretation precision: {precision}, Interpretation recall: {recall}, Kappa: {kappa}'
    )