예제 #1
0
# In[9]:

import numpy as np
import utils.preprocess as pre
reload(pre)

# Preprocesses training
# SPLITS
X_train_raw = raw_train_sequence[:-1]
y_train_raw = raw_train_sequence[1:]
# ENCODES
X_train = pre.one_hot_encode_seq(X_train_raw, n_chars=len(int2char))
y_train = pre.one_hot_encode_seq(y_train_raw, n_chars=len(int2char))
# SHUFFLES
train_indices = pre.shuffle_indices(len(X_train_raw), BATCH_SIZE)
y_train = y_train[train_indices]
# EXPANDS
X_train = X_train[train_indices, np.newaxis, :]

# Preprocesses testing
# SPLITS
X_test_raw = raw_test_sequence[:-1]
y_test_raw = raw_test_sequence[1:]
# ENCODES
X_test = pre.one_hot_encode_seq(X_test_raw, n_chars=len(int2char))
y_test = pre.one_hot_encode_seq(y_test_raw, n_chars=len(int2char))
# SHUFFLES
test_indices = pre.shuffle_indices(len(X_test_raw), BATCH_SIZE)
X_test = X_test[test_indices, np.newaxis, :]
y_test = y_test[test_indices]
예제 #2
0
    def run(self,
            y_feat_frame,
            X,
            y,
            shuf_batch_size=None,
            sample_size=25,
            random_min=-1,
            random_max=1,
            one_hot=True):
        """
            BEWARE: ONLY WORKS WITM MYLSTM!
        """
        def stratified_acc(model):
            self.model.reset_states()
            if shuf_batch_size is not None:
                pred = model.predict(X, batch_size=shuf_batch_size, verbose=0)
            else:
                pred = model.predict(X, verbose=0)

            scores = {}
            for y_ff_val in y_ff_values:

                sub_y = y[y_ff == y_ff_val, ...]
                sub_pred = pred[y_ff == y_ff_val, ...]

                if one_hot:
                    sub_y = np.argmax(sub_y, axis=1)
                    sub_pred = np.argmax(sub_pred, axis=1)
                    f1_avg = 'macro'
                else:
                    f1_avg = 'binary'

                scores[y_ff_val] = f1_score(sub_y, sub_pred, average=f1_avg)

            return scores

        if not X.shape[0] == y.shape[0]:
            raise ValueError('Incorrect number of rows')

        # Run the feat function on labels
        ff_names, ff_val = y_feat_frame.data

        # For every feature...
        all_scores = {}
        for ff_index, ff_name in enumerate(ff_names):

            print '*** Testing the neurons for feature', ff_name

            # Extracts vector and distinct values
            y_ff = ff_val[:, ff_index]
            y_ff_values = list(set(y_ff.tolist()))

            # shuffles the lables to match the order in y
            if shuf_batch_size is not None:
                y_ff_idx = preprocess.shuffle_indices(ff_val.shape[0],
                                                      shuf_batch_size)
                y_ff = y_ff[y_ff_idx]

            # Gets the neurons and baseline neurons
            neurons = self.attrib_neurons[ff_name]
            baseline_neurons = self.not_attrib_neurons[ff_name]

            # Gets original predictions
            print '* Computing original accuracy'
            scores = [stratified_acc(self.model)]
            orig_scores = flatten_dict_array(scores)

            # Hacks the baseline neurons
            print '* Computing baseline accuracies'
            scores = []
            for i in range(sample_size):
                print 'Round', i
                if len(baseline_neurons) > len(neurons):
                    to_hack = random.sample(baseline_neurons, len(neurons))
                else:
                    to_hack = baseline_neurons
                hacked_model = control.randomize_model(self.model, to_hack)
                acc = stratified_acc(hacked_model)
                scores.append(acc)
            base_scores = flatten_dict_array(scores)

            # Hacks the candidate neurons
            print '* Computing candidate accuracies'
            scores = []
            for i in range(sample_size):
                print 'Round', i
                hacked_model = control.randomize_model(self.model, neurons)
                acc = stratified_acc(hacked_model)
                scores.append(acc)
            cand_scores = flatten_dict_array(scores)

            all_scores[ff_name] = {
                'original': orig_scores,
                'candidate': cand_scores,
                'baseline': base_scores
            }

            print all_scores[ff_name]

            self.prediction_scores = all_scores

        return all_scores