def test_gru_cell(): n_inputs = 3 n_units = 4 batch_size = 1 inputs = tx.Input(n_units=n_inputs) gru0 = tx.GRUCell(inputs, n_units, activation=tf.tanh, gate_activation=tf.sigmoid) # applies gate after matrix multiplication and uses # recurrent biases, this makes it compatible with cuDNN # implementation gru1 = GRUCell(n_units, activation='tanh', recurrent_activation='sigmoid', reset_after=False, implementation=1, use_bias=True) assert not hasattr(gru1, "kernel") state0 = [s() for s in gru0.previous_state] # get_initial_state from keras returns either a tuple or a single # state see test_rnn_cell, but the __call__ API requires an iterable state1 = gru1.get_initial_state(inputs, batch_size=1) assert tx.tensor_equal(state1, state0[0]) inputs.value = tf.ones([batch_size, n_inputs]) res1 = gru1(inputs, state0) res1_ = gru1(inputs, state0) for r1, r2 in zip(res1, res1_): assert tx.tensor_equal(r1, r2) # the only difference is that keras kernels are fused together kernel = tf.concat([w.weights.value() for w in gru0.layer_state.w], axis=-1) recurrent_kernel = tf.concat([u.weights for u in gru0.layer_state.u], axis=-1) bias = tf.concat([w.bias for w in gru0.layer_state.w], axis=-1) assert tx.same_shape(kernel, gru1.kernel) assert tx.same_shape(recurrent_kernel, gru1.recurrent_kernel) assert tx.same_shape(bias, gru1.bias) gru1.kernel = kernel gru1.recurrent_kernel = recurrent_kernel gru1.bias = bias res2 = gru1(inputs, state0) for i in range(len(res1)): assert not tx.tensor_equal(res1[i], res2[i]) res0 = gru0() # res0_ = gru0.state[0]() assert tx.tensor_equal(res0, res2[0])
def test_gradient_sparse_var(): """ https://www.tensorflow.org/beta/guide/effective_tf2 """ target = tf.constant([[1., 0., 0.], [1., 0., 0.]]) v = tf.Variable([0.5, 0.5]) x = tx.Lambda([], fn=lambda _: tf.SparseTensor([[0, 0], [1, 1]], v, [2, 3]), n_units=3, var_list=v) assert isinstance(x(), tf.SparseTensor) assert len(x.trainable_variables) == 1 y = tx.Linear(x, n_units=3) # a graph without inputs needs to have missing inputs declared # otherwise it will try to add the inputs detected to inputs graph = tx.Graph.build(inputs=None, outputs=y) fn = graph.as_function() @tf.function def loss(labels): return tf.reduce_mean(tf.pow(labels - fn(), 2)) with tf.GradientTape() as tape: loss_val = loss(target) assert tx.same_shape(tape.gradient(loss_val, v), v.value())
def test_to_sparse_gradient(): target = tf.constant([[1., 0.], [1., 0.]]) x = tx.Constant(tf.ones([1, 4], dtype=tf.float32), n_units=4) h = tx.Linear(x, n_units=2) y = tx.ToSparse(h) y = tx.ToDense(y) @tf.function def loss_fn(labels, prediction): return tf.reduce_mean(tf.pow(labels - prediction, 2)) with tf.GradientTape() as tape: pred = y() loss = loss_fn(target, pred) gradients = tape.gradient(loss, h.trainable_variables) assert len(gradients) == 2 assert tx.same_shape(gradients[0], h.weights) assert tx.same_shape(gradients[1], h.bias)
def test_rnn_layer_config(): x1 = tx.Input(init_value=tf.ones([2, 2]), n_units=2) x_config = x1.config x2 = x_config() assert tx.tensor_equal(x1(), x2()) rnn_cell = tx.RNNCell(input_layer=x1, n_units=3) rnn_proto = rnn_cell.config rnn_cell2 = rnn_proto(x1) assert tx.same_shape(rnn_cell(), rnn_cell2()) assert not tx.tensor_equal(rnn_cell(), rnn_cell2())
def test_multihead_attention(): """ TODO check causality """ n_features = 3 embed_size = 128 seq_size = 3 batch_size = 2 n_heads = 8 inputs = tx.Constant(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) emb = tx.Lookup(inputs, seq_size=seq_size, embedding_shape=[n_features, embed_size]) attention = tx.MHAttention(query=emb, key=emb, value=emb, n_units=embed_size, n_heads=n_heads, causality=False, attention_dropout=0.1, regularized=False) assert len(attention.inputs) == 3 # 3 "kernels" + bias assert len(attention.variables) == 3 attention_reg = attention.reuse_with(emb, emb, emb, regularized=True) attention_2 = attention.reuse_with(emb, emb, emb, regularized=False) attention_causal = attention.reuse_with(emb, emb, emb, causality=True) attention_causal() result = attention() result_reg = attention_reg() result2 = attention_2() assert tx.same_shape(result, result_reg) assert tx.tensor_equal(result, result2) vars1 = map(lambda v: v.ref(), attention.variables) vars2 = map(lambda v: v.ref(), attention_2.variables) assert set(vars1) == set(vars2)
def test_variable_layer_reuse(): input_layer = tx.Input([[1]], n_units=1, dtype=tf.float32) input_layer2 = tx.Input([[1], [2]], n_units=1, dtype=tf.float32) var1 = tx.VariableLayer(shape=[2, 1]) var2 = var1.reuse_with(input_layer) var3 = var1.reuse_with(input_layer2) v0 = var1() v1 = var2() assert not tx.tensor_equal(v0, v1) # v0 inner variable changed when we evaluate v1 v2 = var1() assert not tx.tensor_equal(v0, v1) v3 = var3() assert not tx.tensor_equal(v2, v3) v4 = var1() assert tx.tensor_equal(v3, v4) # variable batch dimension is dynamic its shape will be different assert not tx.same_shape(v4, v1) assert tx.same_shape(v2, v1)
def test_residual(): x1 = tx.Input([[1., 1., 1., 1.]], 4) x2 = tx.Input([[1., 1., 1., 1.]], 4) h1 = tx.FC(x1, 4, activation=tf.sigmoid) h2 = tx.FC(x1, 2, activation=tf.sigmoid) h3 = tx.FC(x2, 2, activation=tf.sigmoid) residual = tx.Residual(x1, h1) residual2 = tx.Residual(x1, h2) with pytest.raises(ValueError): tx.Residual(x1, h3) pytest.fail( "ValueError Expected: invalid module x1 not connected to h3") assert tx.same_shape(h1(), residual()) assert not hasattr(residual, "projection") assert hasattr(residual2, "projection") assert len(residual.trainable_variables) == 0 assert len(residual2.trainable_variables) == 1
def test_shape_equal(): t1 = tf.random.uniform([2, 2], dtype=tf.float32) t2 = tf.random.uniform([2, 3], dtype=tf.float32) assert tx.same_shape(t1, t1) assert not tx.same_shape(t1, t2)