def _test_outputs_gradients(model): x, y, _ = make_data(K.int_shape(model.input), model.layers[2].units) name = model.layers[1].name grads_all = get_gradients(model, name, x, y, mode='outputs') grads_last = get_gradients(model, 2, x, y, mode='outputs') kwargs1 = dict(n_rows=None, show_xy_ticks=[0, 0], show_borders=True, max_timesteps=50, title_mode='grads') kwargs2 = dict(n_rows=2, show_xy_ticks=[1, 1], show_borders=False, max_timesteps=None) features_1D(grads_all[0], **kwargs1) features_1D(grads_all[:1], **kwargs1) features_1D(grads_all, **kwargs2) features_2D(grads_all[0], norm=(-.01, .01), show_colorbar=True, **kwargs1) features_2D(grads_all, norm=None, reflect_half=True, **kwargs2) features_0D(grads_last, marker='o', color=None, title_mode='grads') features_0D(grads_last, marker='x', color='blue', ylims=(-.1, .1)) features_hist(grads_all, bins=100, xlims=(-.01, .01), title="Outs hists") features_hist(grads_all, bins=100, n_rows=4) print('\n') # improve separation
def _get_data(model, _id, mode, input_data, labels, omit_names, kw): if mode == 'weights': data = get_weights(model, _id, omit_names, as_dict=True) elif 'gradients' in mode: if mode in ('gradients', 'gradients:outputs'): data = get_gradients(model, _id, input_data, labels, mode='outputs', as_dict=True) else: data = get_gradients(model, _id, input_data, labels, mode='weights', as_dict=True) elif mode == 'outputs': data = get_outputs(model, _id, input_data, as_dict=True) data_flat = [x.ravel() for x in data.values()] return data_flat, list(data)
def test_multi_io(): def _make_multi_io_model(): ipt1 = Input((40, 8)) ipt2 = Input((40, 16)) ipts = concatenate([ipt1, ipt2]) out1 = GRU(6, return_sequences=True)(ipts) out2 = GRU(12, return_sequences=True)(ipts) model = Model([ipt1, ipt2], [out1, out2]) model.compile('adam', 'mse') return model def _make_multi_io_data(): x1 = np.random.randn(8, 40, 8) x2 = np.random.randn(8, 40, 16) y1 = np.random.randn(8, 40, 6) y2 = np.random.randn(8, 40, 12) return x1, x2, y1, y2 model = _make_multi_io_model() x1, x2, y1, y2 = _make_multi_io_data() grads = get_gradients(model, '*', [x1, x2], [y1, y2]) outs = get_outputs(model, '*', [x1, x2]) assert outs[0].shape == grads[0].shape == (8, 40, 24) assert outs[1].shape == grads[1].shape == (8, 40, 6) assert outs[2].shape == grads[2].shape == (8, 40, 12) cprint("\n<< MULTI_IO TESTS PASSED >>\n", 'green')
def viz_outs_grads(model, idx=1): x, y, _ = make_data(K.int_shape(model.input), model.layers[2].units) grads = get_gradients(model, idx, x, y) kws = dict(n_rows=8, title='grads') features_1D(grads[0], show_borders=False, **kws) features_2D(grads, norm=(-1e-4, 1e-4), **kws)
def test_errors(): # test Exception cases units = 6 batch_shape = (8, 100, 2 * units) reset_seeds(reset_graph_with_backend=K) model = make_model(GRU, batch_shape, activation='relu', recurrent_dropout=0.3, IMPORTS=IMPORTS) x, y, sw = make_data(batch_shape, units) model.train_on_batch(x, y, sw) grads = get_gradients(model, 1, x, y) grads_4D = np.expand_dims(grads, -1) from see_rnn.inspect_gen import get_layer, _make_grads_fn pass_on_error(features_0D, grads) pass_on_error(features_0D, grads_4D) pass_on_error(features_1D, grads_4D) pass_on_error(features_2D, grads_4D) pass_on_error(features_2D, grads) pass_on_error(get_gradients, model, 1, x, y, mode='cactus') pass_on_error(get_gradients, model, 1, x, y, layer=model.layers[1]) pass_on_error(_make_grads_fn, model, model.layers[1], mode='banana') pass_on_error(features_hist, grads[:, :4, :3], po='tato') pass_on_error(features_hist_v2, grads[:, :4, :3], po='tato') pass_on_error(get_layer, model) pass_on_error(get_layer, model, 'capsule') pass_on_error(rnn_heatmap, model, 1, input_data=x, labels=y, mode='coffee') pass_on_error(rnn_heatmap, model, 1, co='vid') pass_on_error(rnn_heatmap, model, 1, norm=(0, 1, 2)) pass_on_error(rnn_heatmap, model, 1, mode='grads') pass_on_error(rnn_histogram, model, 1, norm=None) pass_on_error(rnn_heatmap, model, layer_index=9001) pass_on_error(features_0D, grads, cake='lie') pass_on_error(features_1D, grads, pup='not just any') pass_on_error(features_2D, grads, true=False) outs = list(get_outputs(model, 1, x, as_dict=True).values()) pass_on_error(rnn_histogram, model, 1, data=outs) pass_on_error(rnn_histogram, model, 1, data=[1]) pass_on_error(rnn_histogram, model, 1, data=[[1]]) pass_on_error(features_hist, grads, co='vid') pass_on_error(features_0D, grads, configs={'x': {}}) pass_on_error(features_1D, grads, configs={'x': {}}) pass_on_error(features_2D, grads, configs={'x': {}}) pass_on_error(features_hist, grads, configs={'x': {}}) pass_on_error(features_hist_v2, grads, configs={'x': {}}) cprint("\n<< EXCEPTION TESTS PASSED >>\n", 'green') assert True
def test_none_gradients(): n_classes = 4 batch_size = 16 def _make_softmax_model(): ipt = Input(batch_shape=(batch_size, 8)) x = Dense(n_classes)(ipt) out = Activation('softmax')(x) model = Model(ipt, out) model.compile('adam', 'categorical_crossentropy') return model def _make_data(): class_labels = np.random.randint(0, n_classes, batch_size) y = np.eye(n_classes)[class_labels] x = np.random.randn(batch_size, 8) return x, y model = _make_softmax_model() x, y = _make_data() model.train_on_batch(x, y) get_gradients(model, '*', x, y)
def test_misc(): # test miscellaneous functionalities units = 6 batch_shape = (8, 100, 2 * units) reset_seeds(reset_graph_with_backend=K) model = make_model(GRU, batch_shape, activation='relu', recurrent_dropout=0.3, IMPORTS=IMPORTS) x, y, sw = make_data(batch_shape, units) model.train_on_batch(x, y, sw) weights_norm(model, 'gru', omit_names='bias', verbose=1) weights_norm(model, ['gru', 1, (1, 1)], norm_fn=np.abs) stats = weights_norm(model, 'gru') weights_norm(model, 'gru', _dict=stats) grads = get_gradients(model, 1, x, y) get_gradients(model, 1, x, y, as_dict=True) get_gradients(model, ['gru', 1], x, y) get_outputs(model, ['gru', 1], x) features_1D(grads, subplot_samples=True, tight=True, borderwidth=2, share_xy=False) with tempdir() as dirpath: features_0D(grads[0], savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_1D(grads[0], subplot_samples=True, annotations=[1, 'pi'], savepath=os.path.join(dirpath, 'img.png')) features_2D(grads.T, n_rows=1.5, tight=True, borderwidth=2) with tempdir() as dirpath: features_2D(grads.T[:, :, 0], norm='auto', savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_hist(grads, show_borders=False, borderwidth=1, annotations=[0], show_xy_ticks=[0, 0], share_xy=(1, 1), title="grads", savepath=os.path.join(dirpath, 'img.png')) with tempdir() as dirpath: features_hist_v2(list(grads[:, :4, :3]), colnames=list('abcd'), show_borders=False, xlims=(-.01, .01), ylim=100, borderwidth=1, show_xy_ticks=[0, 0], side_annot='row', share_xy=True, title="Grads", savepath=os.path.join(dirpath, 'img.png')) features_hist(grads, center_zero=True, xlims=(-1, 1), share_xy=0) features_hist_v2(list(grads[:, :4, :3]), center_zero=True, xlims=(-1, 1), share_xy=(False, False)) with tempdir() as dirpath: rnn_histogram(model, 1, show_xy_ticks=[0, 0], equate_axes=2, savepath=os.path.join(dirpath, 'img.png')) rnn_histogram(model, 1, equate_axes=False, configs={ 'tight': dict(left=0, right=1), 'plot': dict(color='red'), 'title': dict(fontsize=14), }) rnn_heatmap(model, 1, cmap=None, normalize=True, show_borders=False) rnn_heatmap(model, 1, cmap=None, norm='auto', absolute_value=True) rnn_heatmap(model, 1, norm=None) with tempdir() as dirpath: rnn_heatmap(model, 1, norm=(-.004, .004), savepath=os.path.join(dirpath, 'img.png')) hist_clipped(grads, peaks_to_clip=2) _, ax = plt.subplots(1, 1) hist_clipped(grads, peaks_to_clip=2, ax=ax, annot_kw=dict(fontsize=15)) get_full_name(model, 'gru') get_full_name(model, 1) pass_on_error(get_full_name, model, 'croc') get_weights(model, 'gru', as_dict=False) get_weights(model, 'gru', as_dict=True) get_weights(model, 'gru/bias') get_weights(model, ['gru', 1, (1, 1)]) pass_on_error(get_weights, model, 'gru/goo') get_weights(model, '*') get_gradients(model, '*', x, y) get_outputs(model, '*', x) from see_rnn.utils import _filter_duplicates_by_keys keys, data = _filter_duplicates_by_keys(list('abbc'), [1, 2, 3, 4]) assert keys == ['a', 'b', 'c'] assert data == [1, 2, 4] keys, data = _filter_duplicates_by_keys(list('abbc'), [1, 2, 3, 4], [5, 6, 7, 8]) assert keys == ['a', 'b', 'c'] assert data[0] == [1, 2, 4] and data[1] == [5, 6, 8] from see_rnn.inspect_gen import get_layer, detect_nans get_layer(model, 'gru') get_rnn_weights(model, 1, concat_gates=False, as_tensors=True) rnn_heatmap(model, 1, input_data=x, labels=y, mode='weights') _test_prefetched_data(model) # test NaN/Inf detection nan_txt = detect_nans(np.array([1] * 9999 + [np.nan])).replace('\n', ' ') print(nan_txt) # case: print as quantity data = np.array([np.nan, np.inf, -np.inf, 0]) print(detect_nans(data, include_inf=True)) print(detect_nans(data, include_inf=False)) data = np.array([np.inf, 0]) print(detect_nans(data, include_inf=True)) detect_nans(np.array([0])) K.set_value(model.optimizer.lr, 1e12) train_model(model, iterations=10) rnn_histogram(model, 1) rnn_heatmap(model, 1) del model reset_seeds(reset_graph_with_backend=K) # test SimpleRNN & other _model = make_model(SimpleRNN, batch_shape, units=128, use_bias=False, IMPORTS=IMPORTS) train_model(_model, iterations=1) # TF2-Keras-Graph bug workaround rnn_histogram(_model, 1) # test _pretty_hist K.set_value(_model.optimizer.lr, 1e50) # force NaNs train_model(_model, iterations=20) rnn_heatmap(_model, 1) data = get_rnn_weights(_model, 1) rnn_heatmap(_model, 1, input_data=x, labels=y, data=data) os.environ["TF_KERAS"] = '0' get_rnn_weights(_model, 1, concat_gates=False) del _model assert True cprint("\n<< MISC TESTS PASSED >>\n", 'green')
def viz_outs_grads_last(model, idx=2): # return_sequences=False layer x, y, _ = make_data(K.int_shape(model.input), model.layers[2].units) grads = get_gradients(model, idx, x, y) features_0D(grads)
def compute_gradient_norm(self, input_data, labels, sample_weight=None, learning_phase=0, _id='*', mode='weights', norm_fn=(np.sqrt, np.square), scope='local'): """Computes gradients w.r.t. layer weights or outputs per `_id`, and returns norm according to `norm_fn` and `scope`. Arguments: input_data: np.ndarray / list[np.ndarray] / supported formats Data w.r.t. which loss is to be computed for the gradient. List of arrays for multi-input networks. "Supported formats" is any valid input to `model`. labels: np.ndarray / list[np.ndarray] / supported formats Labels w.r.t. which loss is to be computed for the gradient. sample_weight: np.ndarray / list[np.ndarray] / supported formats kwarg to `model.fit()`, etc., weighting individual sample losses. learning_phase: bool / int[bool] - 1: use model in train mode - 0: use model in inference mode _id: str / int / list[str/int]. - int -> idx; str -> name - idx: int. Index of layer to fetch, via model.layers[idx]. - name: str. Name of layer (full or substring) to be fetched. Returns earliest match if multiple found. - list[str/int] -> treat each str element as name, int as idx. Ex: `['gru', 2]` gets (e.g.) weights of first layer with name substring 'gru', then of layer w/ idx 2. - `'*'` (wildcard) -> get (e.g.) outputs of all layers (except input) with 'output' attribute. mode: str in ('weights', 'outputs', 'gradients:weights',\ 'gradients:outputs') Whether to fetch layer weights, outputs, or gradients (w.r.t. outputs or weights). norm_fn: (function, function) / function Norm function(s) to apply to gradients arrays when gathering. `(np.sqrt, np.square)` for L2-norm, `np.abs` for L1-norm. Computed as: `outer_fn(sum(inner_fn(x) for x in data))`, where `outer_fn, inner_fn = norm_fn` if `norm_fn` is list/tuple, and `inner_fn = norm_fn` and `outer_fn = lambda x: x` otherwise. scope: str in ('local', 'global') Whether to apply `stat_fn` on individual gradient arrays, or sum of. Returns: Gradient norm(s). List of float if `scope == 'local'` (norms of weights), else float (`outer_fn(sum(sum(inner_fn(g)) for g in grads))`). TensorFlow optimizers do gradient clipping according to the `clipnorm` setting by comparing individual weights' L2-norms against `clipnorm`, and rescaling if exceeding. These L2 norms can be obtained using `norm_fn=(np.sqrt, np.square)` with `scope == 'local'` and `mode='weights'`. See: - `tensorflow.python.keras.optimizer_v2.optimizer_v2._clip_gradients` - `keras.optimizers.clip_norm` - `tensorflow.python.ops.clip_ops.clip_by_norm` """ if scope not in ('local', 'global'): raise ValueError("`scope` must be one of: 'local', 'global' " "(got '%s')" % scope) if isinstance(norm_fn, (tuple, list)): outer_fn, inner_fn = norm_fn else: outer_fn, inner_fn = lambda x: x, norm_fn sample_weight = _validate_sample_weight(self.model, sample_weight) grads = get_gradients(self.model, _id, input_data, labels, sample_weight, learning_phase, mode=mode, as_dict=False) inner_sum = [np.sum(inner_fn(g)) for g in grads] if scope == 'local': # same as e.g. [np.sqrt(np.sum(np.square(g))) for g in grads], but faster return outer_fn(inner_sum) else: return outer_fn(np.sum(inner_sum))