def visualize_context_vector(model: Model, dataset_id, dataset_prefix, cutoff=None, limit=None, normalize_timeseries=False, visualize_sequence=True, visualize_classwise=False): X_train, y_train, X_test, y_test, is_timeseries = load_dataset_at( dataset_id, normalize_timeseries=normalize_timeseries) _, sequence_length = calculate_dataset_metrics(X_train) if sequence_length != MAX_SEQUENCE_LENGTH_LIST[dataset_id]: if cutoff is None: choice = cutoff_choice(dataset_id, sequence_length) else: assert cutoff in [ 'pre', 'post' ], 'Cutoff parameter value must be either "pre" or "post"' choice = cutoff if choice not in ['pre', 'post']: return else: X_train, X_test = cutoff_sequence(X_train, X_test, choice, dataset_id, sequence_length) attn_lstm_layer = [(i, layer) for (i, layer) in enumerate(model.layers) if layer.__class__.__name__ == 'AttentionLSTM'] if len(attn_lstm_layer) == 0: raise AttributeError('Provided model does not have an Attention layer') else: i, attn_lstm_layer = attn_lstm_layer[ 0] # use first attention lstm layer only attn_lstm_layer.return_attention = True model.layers[i] = attn_lstm_layer model.load_weights("./weights/%s_weights.h5" % dataset_prefix) attention_output = model.layers[i].call(model.input) eval_functions = build_function(model, attn_lstm_layer.name, outputs=[attention_output]) attention_vectors = [] for i in range(X_train.shape[0]): activations = get_outputs(model, X_train[i, :, :][np.newaxis, ...], eval_functions, verbose=False)[0] attention_vector = np.sum(activations, axis=1).squeeze() attention_vectors.append(attention_vector) attention_vectors = np.array(attention_vectors) attention_vector_final = np.mean(attention_vectors, axis=0) if visualize_sequence: # plot input sequence part that is paid attention too in detail attention_vector_final = attention_vector_final.reshape( (1, attention_vector_final.shape[0])) X_train_attention = np.zeros_like(X_train) X_test_attention = np.zeros_like(X_test) for i in range(X_train.shape[0]): X_train_attention[ i, :, :] = attention_vector_final * X_train[i, :, :] for i in range(X_test.shape[0]): X_test_attention[ i, :, :] = attention_vector_final * X_test[i, :, :] plot_dataset(dataset_id, seed=1, limit=limit, cutoff=cutoff, normalize_timeseries=normalize_timeseries, plot_data=(X_train, y_train, X_test, y_test, X_train_attention, X_test_attention), type='Context', plot_classwise=visualize_classwise) else: # plot only attention chart train_df = pd.DataFrame({'attention (%)': attention_vector_final}, index=range(attention_vector_final.shape[0])) train_df.plot(kind='bar', title='Attention Mechanism (Train) as ' 'a function of input' ' dimensions.') plt.show()
def visualize_context_vector(model: Model, series_values, labels, run_prefix, cutoff=None, limit=None, val_split=1 / 3, random_state=0, visualize_sequence=True, visualize_classwise=False): """ Visualize the Context Vector of the Attention LSTM. Args: model: an Attention LSTM-FCN Model. dataset_id: Integer id representing the dataset index containd in `utils/constants.py`. dataset_prefix: Name of the dataset. Used for weight saving. batch_size: Size of each batch for evaluation. test_data_subset: Optional integer id to subset the test set. To be used if the test set evaluation time is significantly. cutoff: Optional integer which slices of the first `cutoff` timesteps from the input signal. limit: Number of samples to be visualized in one plot. normalize_timeseries: Bool / Integer. Determines whether to normalize the timeseries. If False, does not normalize the time series. If True / int not equal to 2, performs standard sample-wise z-normalization. If 2: Performs full dataset z-normalization. visualize_sequence: Bool flag, whetehr to visualize the sequence attended to by the Context Vector or just the Context Vector itself. visualize_classwise: Bool flag. Wheter to visualize the samples seperated by class. When doing so, `limit` is multiplied by the number of classes so it is better to set `limit` to 1 in such cases. """ inds = np.arange(series_values.shape[0]) np.random.seed(random_state) np.random.shuffle(inds) series_values = series_values[inds] labels = labels[inds] val_split = int(val_split * series_values.shape[0]) X_train, y_train = series_values[:-val_split], labels[:-val_split] X_test, y_test = series_values[-val_split:], labels[-val_split:] sequence_length = series_values.shape[1] attn_lstm_layer = [(i, layer) for (i, layer) in enumerate(model.layers) if layer.__class__.__name__ == 'AttentionLSTM'] if len(attn_lstm_layer) == 0: raise AttributeError('Provided model does not have an Attention layer') else: i, attn_lstm_layer = attn_lstm_layer[ 0] # use first attention lstm layer only attn_lstm_layer.return_attention = True model.layers[i] = attn_lstm_layer model.load_weights("./weights/%s_weights.h5" % run_prefix) attention_output = model.layers[i].call(model.input) eval_functions = build_function(model, attn_lstm_layer.name, outputs=[attention_output]) train_attention_vectors = [] test_attention_vectors = [] output_shape = [X_train.shape[-1], 1, 1] for i in range(X_train.shape[0]): activations = get_outputs(model, X_train[i, :, :][np.newaxis, ...], eval_functions, verbose=False)[0] # print("activations", activations.shape) attention_vector = activations.reshape((-1, 1, 1)) attention_vector = (attention_vector - attention_vector.min()) / ( attention_vector.max() - attention_vector.min()) attention_vector = (attention_vector * 2.) - 1. attention_vector = resize(attention_vector, output_shape, mode='reflect', anti_aliasing=True) attention_vector = attention_vector.reshape([1, -1]) train_attention_vectors.append(attention_vector) for i in range(X_test.shape[0]): activations = get_outputs(model, X_test[i, :, :][np.newaxis, ...], eval_functions, verbose=False)[0] # print("activations", activations.shape) attention_vector = activations.reshape((-1, 1, 1)) attention_vector = (attention_vector - attention_vector.min()) / ( attention_vector.max() - attention_vector.min()) attention_vector = (attention_vector * 2.) - 1. attention_vector = resize(attention_vector, output_shape, mode='reflect', anti_aliasing=True) attention_vector = attention_vector.reshape([1, -1]) test_attention_vectors.append(attention_vector) train_attention_vectors = np.array(train_attention_vectors) test_attention_vectors = np.array(test_attention_vectors) print("Train Attention Vectors Shape :", train_attention_vectors.shape) print("Test Attentin Vectors Shape :", test_attention_vectors.shape) if visualize_sequence: # plot input sequence part that is paid attention too in detail X_train_attention = train_attention_vectors * X_train X_test_attention = test_attention_vectors * X_test plot_dataset(series_values, labels, run_prefix, val_split=val_split, seed=1, limit=limit, cutoff=cutoff, plot_data=(X_train, y_train, X_test, y_test, X_train_attention, X_test_attention), type='Context', plot_classwise=visualize_classwise) else: # plot only attention chart choice = np.random.randint(0, train_attention_vectors.shape[0]) train_df = pd.DataFrame( {'attention (%)': train_attention_vectors[choice, 0]}, index=range(train_attention_vectors.shape[-1])) train_df.plot(kind='bar', title='Attention Mechanism (Train) as ' 'a function of input' ' dimensions. Class = %d' % (y_train[choice])) plt.show()