def test_variable_checkpoint(tmp_path): inputs = tx.Constant(tf.ones([2, 4])) l1 = tx.Linear(inputs, 3, add_bias=True, name="l1") l2 = tx.Linear(inputs, 3, add_bias=False, name="l1") # track: AutoTrackable = l1.layer_state checkpoint = tf.train.Checkpoint(l1=l1) manager = tf.train.CheckpointManager(checkpoint, tmp_path / 'ckpts', max_to_keep=1) manager.save(1) # manager.save(2) l1.weights.assign(l2.weights.value()) status = checkpoint.restore(manager.latest_checkpoint) status.assert_existing_objects_matched() checkpoint_vars = tf.train.list_variables(manager.latest_checkpoint) assert len(checkpoint_vars) == 4 assert checkpoint_vars[0][0] == '_CHECKPOINTABLE_OBJECT_GRAPH' assert "l1/bias" in checkpoint_vars[1][0] assert "l1/weights" in checkpoint_vars[2][0] assert "save_counter" in checkpoint_vars[3][0]
def _build_graph(self, layer, previous_state): with layer_scope(self): if previous_state is None: input_batch = tf.shape(layer.tensor)[0] zero_state = tf.zeros([input_batch, self.n_units]) self.previous_state = tx.TensorLayer(zero_state, self.n_units) if self.share_state_with is None: kernel_linear = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="linear_kernel") kernel_act = tx.Activation(kernel_linear, self.activation) self.kernel = tx.Compose([kernel_linear, kernel_act]) self.recurrent_kernel = tx.Linear( self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="recurrent_kernel") else: self.kernel = self.share_state_with.kernel.reuse_with(layer) self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with( self.previous_state) # TODO this might be wrong, I might need to couple the activation: act(kernel + recurrent + bias) # TODO it is wrong https://github.com/tensorflow/tensorflow/blob/r1.8/tensorflow/python/ops/rnn_cell_impl.py # """Most basic RNN: output = new_state = act(W * input + U * state + B).""" return self.kernel.tensor + self.recurrent_kernel.tensor
def test_graph_build(): x = tx.Input([[1]]) g = Graph.build(None, x) assert len(g.in_nodes) == len(g.out_nodes) assert len(g.in_nodes) == 1 l1 = tx.Linear(x, n_units=2) l2 = tx.Linear(x, n_units=2) l3 = tx.Linear(x, n_units=2) g1 = Graph.build(None, l1) assert len(g1.in_nodes) == len(g1.out_nodes) assert set.isdisjoint(set(g1.in_nodes), g1.out_nodes) assert l1 in g1.out_nodes assert x in g1.in_nodes g2 = Graph.build(x, l1) assert not set.isdisjoint(set(g1.in_nodes), g2.in_nodes) assert not set.isdisjoint(set(g1.out_nodes), g2.out_nodes) with pytest.raises(ValueError): Graph.build([l2, l3], l1) pytest.fail("ValueError Expected: invalid graph") g = Graph.build(x, [l2, l3]) assert len(g.edges_out[x]) == 2 assert l2 in g.edges_out[x] assert l3 in g.edges_out[x] assert x == g.edges_in[l2][0]
def test_linear_rank3(): val = tf.constant([[[1], [1]], [[2], [2]]]) x1 = tx.Input(val, dtype=tf.float32) x2 = tx.Transpose(x1) assert val.shape[1:] == x1.shape[1:] x1_flat = tx.Reshape(x1, [-1, 1]) linear1 = tx.Linear(x1, n_units=2) linear2 = tx.Linear(x2, weights_shape=[2, 1], weights=linear1.weights, transpose_weights=True) # we cant do this because it changes the definition # of the layer (n_units etc) with pytest.raises(ValueError): linear1.reuse_with(x2, transpose_weights=True) pytest.fail( "can't reuse with transpose weights while changing the layer definition" ) linear_flat = linear1.reuse_with(x1_flat, shape=(4, 2)) x1_tensor = x1() new_shape = x1_tensor.shape[:-1] + [2] linear_flat = tx.Reshape(linear_flat, new_shape) assert tx.tensor_equal(linear1(), linear_flat()) assert tx.tensor_equal(tf.shape(linear2()), [1, 2, 1])
def test_linear(): inputs = tx.Constant(tf.ones([2, 4]), dtype=tf.float64) inputs2 = inputs * 2 linear = tx.Linear(inputs, n_units=8, dtype=tf.float64) w = linear.weights b = linear.bias assert w.shape == [4, 8] assert b.shape == [8] assert len(linear.trainable_variables) == 2 t1 = linear() t2 = linear() assert tx.tensor_equal(t1, t2) linear2 = tx.Linear(linear.inputs[0], 8, share_state_with=linear, dtype=tf.float64) t3 = linear2() assert tx.tensor_equal(t1, t3) linear = tx.Linear(inputs, 8, dtype=tf.float64) linear2 = linear.reuse_with(inputs2) assert linear.weights is linear2.weights assert linear.bias is linear2.bias assert tx.tensor_equal(linear() * 2, linear2())
def test_graph_repeated(): x = tx.Input([[1]]) l1 = tx.Linear(x, 2, name="l1") l2 = tx.Linear(x, 2, name="l2") l3 = tx.layer(n_units=2, name="l3")(lambda a, b: tf.add(a, b))(l1, l2) g = Graph.build(l1, l3, add_missing_inputs=True) assert set([x, l1]) == set(g.in_nodes)
def test_shared_state(): inputs = tf.ones([2, 4]) l1 = tx.Linear(inputs, 8) l2 = tx.Linear(inputs, 8, share_state_with=l1) proto = tx.Linear.config(n_units=8, share_state_with=l1) l3 = proto(inputs) assert l1.weights is l2.weights assert l1.bias is l2.bias assert l1.weights is l3.weights assert l1.bias is l3.bias
def test_graph_draw(tmpdir): x = tx.Input([[1]]) x2 = tx.Input([[1]]) l1 = tx.Linear(x, 2, name="l1") l2 = tx.Linear(x, 2, name="l2") l3 = tx.layer(n_units=2, name="l3")(lambda a, b: tf.add(a, b))(l1, l2) l4 = l2.reuse_with(x2) graph = Graph.build(inputs=[x, x2], outputs=[l3, l4]) str_path = str(tmpdir.join("test.pdf")) graph.draw(path=str_path) assert os.path.exists(str_path)
def test_gate(): inputs = tx.Input(init_value=tf.ones([2, 3])) linear = tx.Linear(inputs, n_units=4) nop = tx.Activation(linear, fn=tx.identity) gate_w = tx.Linear(linear, n_units=4, add_bias=True) gate1 = tx.Gate(linear, gate_w) gate2 = gate1.reuse_with(nop) assert tx.shape_equal(gate1.shape, gate2.shape) r1 = gate1() r2 = gate2() assert tx.tensor_equal(r1, r2)
def test_gradient_sparse_var(): """ https://www.tensorflow.org/beta/guide/effective_tf2 """ target = tf.constant([[1., 0., 0.], [1., 0., 0.]]) v = tf.Variable([0.5, 0.5]) x = tx.Lambda([], fn=lambda _: tf.SparseTensor([[0, 0], [1, 1]], v, [2, 3]), n_units=3, var_list=v) assert isinstance(x(), tf.SparseTensor) assert len(x.trainable_variables) == 1 y = tx.Linear(x, n_units=3) # a graph without inputs needs to have missing inputs declared # otherwise it will try to add the inputs detected to inputs graph = tx.Graph.build(inputs=None, outputs=y) fn = graph.as_function() @tf.function def loss(labels): return tf.reduce_mean(tf.pow(labels - fn(), 2)) with tf.GradientTape() as tape: loss_val = loss(target) assert tx.same_shape(tape.gradient(loss_val, v), v.value())
def test_layer_graph(): data = [[1., 2.]] in1 = tx.Input(n_units=2, name="in1", constant=False) in2 = tx.Input(n_units=2, name="in2", constant=False) linear = tx.Linear(in1, 1, add_bias=False) graph = tx.Graph.build(inputs=in1, outputs=linear) assert in1 in graph.in_nodes with pytest.raises(ValueError): tx.Graph.build(inputs=[in1, in2], outputs=linear) pytest.fail( "Expected ValueError: some inputs are not connected to anything") with pytest.raises(ValueError): tx.Graph.build(inputs=[in2], outputs=linear) pytest.fail( "Expected ValueError: inputs specified but dependencies are missing" ) w = tf.matmul(data, linear.weights) in1.value = data r1 = linear() r2 = graph(data) assert tx.tensor_equal(r2[0], w) assert tx.tensor_equal(r1, w)
def test_add_optimizer(): target = tx.Constant([[1.]]) inputs = tx.Input(n_units=2, name="inputs") output = tx.Linear(inputs, n_units=1, name="y") loss = tx.Lambda(target, output, fn=tf.nn.softmax_cross_entropy_with_logits, name="xent") m = tx.Model(run_outputs=output, train_inputs=[inputs, target], train_loss=loss) lr = tx.Param(init_value=0.2, name="lr") optimizer1 = m.set_optimizer(tf.optimizers.SGD, lr=lr) optimizer2: tf.optimizers.Optimizer = m.optimizer assert optimizer1 == optimizer2 # optimizer is hashable opt_dict = {optimizer1: 0, optimizer2: 1} assert optimizer1 in opt_dict assert opt_dict[optimizer1] == 1 lr.value = 0.3 assert np.float32(0.3) == optimizer1.lr.numpy()
def test_model_run(): data1 = tf.constant([[1., 1.]]) x = tx.Input(n_units=2, name="x", constant=False) labels = tx.Input(n_units=2, name="y_", constant=False) y = tx.Linear(x, 2, name="y") out1 = tx.Activation(y, tf.nn.softmax) out2 = tx.Activation(y, tf.nn.softmax) @tx.layer(n_units=2, name="loss") def loss(pred, labs): return tf.losses.categorical_crossentropy(labs, pred) model = tx.Model(run_inputs=x, run_outputs=[out1, out2], train_inputs=[x, labels], train_outputs=out1, train_loss=loss(out1, labels)) model.set_optimizer(tf.optimizers.SGD, lr=0.5) result1 = model.run({x: data1}) result2 = model.run([data1]) assert tx.tensor_equal(result1[0], result2[0]) assert tx.tensor_equal(result1[1], result2[1]) result3 = model.run({x: data1}, compiled_graph=True) assert tx.tensor_equal(result3[0], result2[0]) assert tx.tensor_equal(result3[1], result2[1])
def test_loss_model_dependencies(): inputs = tx.Input(n_units=2, name="x", constant=False) labels = tx.Input(n_units=2, name="y_", constant=False) y = tx.Linear(inputs, 2, name="y") out1 = tx.Activation(y, tf.nn.softmax, name="out1") out2 = tx.Activation(y, tf.nn.softmax, name="out2") @tx.layer(n_units=2, name="loss") def loss(pred, labs): return tf.losses.categorical_crossentropy(labs, pred) logging.basicConfig(level=logging.DEBUG) model = tx.Model(run_inputs=inputs, run_outputs=[out1, out2], train_inputs=[inputs, labels], train_outputs=[out2, out1], train_loss=loss(out1, labels)) lr = tx.Param(0.5) opt = model.set_optimizer(tf.optimizers.SGD, lr=lr) assert isinstance(opt, tf.optimizers.Optimizer) it = model.train_graph.dependency_iter() layers = list(it) assert layers[0] is inputs assert layers[1] is labels assert len(layers) == 6
def test_coupled_gate(): vocab_size = 4 n_features = 3 seq_size = 2 inputs = tx.Input(init_value=np.array([[2, 0], [1, 2]]), n_units=seq_size, dtype=tf.int32, constant=True) features1 = tx.Lookup(inputs, seq_size, embedding_shape=[vocab_size, n_features]).as_concat() features2 = tx.Lookup(inputs, seq_size, embedding_shape=[vocab_size, n_features]).as_concat() gate_w = tx.Linear(features1, seq_size, add_bias=True) coupled_gate = tx.CoupledGate(features1, features2, gate_w) sp_features1 = tx.ToSparse(features1) assert tx.tensor_equal(tf.sparse.to_dense(sp_features1()), features1()) sp_gate = tx.CoupledGate(sp_features1, features2, gate_w) print(sp_gate()) print(sp_gate.shape) # coupled_gate2 = coupled_gate.reuse_with(sp_features1, features2) r1 = coupled_gate()
def test_set_optimizer(): x = tx.Input(n_units=2, name="x", constant=False) labels = tx.Input(n_units=2, name="labels", constant=False) y = tx.Linear(x, 2, name="y") out1 = tx.Activation(y, tf.nn.softmax) out2 = tx.Activation(y, tf.nn.softmax) @tx.layer(n_units=2, name="loss") def loss(pred, labs): return tf.losses.categorical_crossentropy(labs, pred) model = tx.Model(run_inputs=x, run_outputs=[out1, out2], train_inputs=[x, labels], train_outputs=[out2, out1], train_loss=loss(out1, labels)) lr = tx.Param(0.5) opt = model.set_optimizer(tf.optimizers.SGD, learning_rate=lr, clipnorm=0.1) assert isinstance(opt, tf.optimizers.Optimizer) assert model.optimizer.get_config()["learning_rate"] == 0.5 data1 = [[1., 1.], [1., 1.]] data2 = tf.constant([[0., 1.], [0., 1.]]) params = model.optimizer_params[model.optimizer] data_dict, params_dict = tx.Model.parse_input( { x: data1, "learning_rate": 0.2 }, model.run_graph.in_nodes, params) assert len(data_dict) == 1 assert len(params_dict) == 1 assert model.optimizer_params[opt]["learning_rate"] is lr result1 = model.train_step({x: data1, labels: data2}) result2 = model.train_step([data1, data2]) assert len(result1) == 3 assert len(result2) == 3 assert tf.reduce_all(tf.less(result2[-1], result1[-1])) result1 = model.run({x: np.array(data1, dtype=np.float32)}) result2 = model.run([data1]) result3 = model.run(np.array(data1, np.float32)) x.value = data1 o2 = out2() o1 = out1() result4 = (o2, o1) for i in range(2): assert tx.tensor_equal(result1[i], result2[i]) assert tx.tensor_equal(result1[i], result3[i]) assert tx.tensor_equal(result1[i], result4[i])
def test_attention(): n_features = 3 embed_size = 8 seq_size = 3 batch_size = 2 inputs = tx.Constant(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) emb = tx.Lookup(inputs, seq_size=seq_size, embedding_shape=[n_features, embed_size]) seq = emb() # keras attention doesn't have multiple heads attention = Attention(use_scale=False) res = attention([seq, seq, seq]) attention2 = tx.MHAttention(emb, emb, emb, n_units=embed_size, n_heads=1) assert len(attention2.variables) == 3 attention2.wq = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) attention2.wk = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) attention2.wv = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) assert tx.tensor_equal(attention2.wq(seq), seq) res2 = attention2() g = tx.Graph.build(inputs=emb, outputs=attention2) g = g.as_function(ord_inputs=emb, ord_outputs=attention2) res3 = g(seq) assert tx.tensor_equal(res, res2) assert tx.tensor_equal(res, res3)
def test_to_sparse(): inputs = tx.Input(init_value=tf.ones([2, 100])) linear = tx.Linear(inputs, n_units=100) relu = tx.Activation(linear, tx.relu) sparse = tx.ToSparse(relu) assert tx.shape_equal(sparse.shape, linear.shape) assert tx.shape_equal(sparse.shape, relu.shape)
def test_multi_output_graph(): data1 = [[1., 1.]] data2 = [[2., 1.]] in1 = tx.Input(data1, 2, name="in1", constant=False) in2 = tx.Input(data2, 2, name="in2") linear1 = tx.Linear(in1, 1) linear2 = tx.Linear(tx.Add(in1, in2), 1) graph = tx.Graph.build(inputs=None, outputs=[linear1, linear2]) result1 = graph() assert len(result1) == 2 graph2 = tx.Graph.build(inputs=None, outputs=[linear2]) result2 = graph2() assert len(result2) == 1 assert tx.tensor_equal(result2[0], result1[-1])
def test_graph_no_inputs(): in1 = tx.Input(n_units=2, constant=False) lin1 = tx.Linear(in1, n_units=4) graph = tx.Graph.build(inputs=None, outputs=lin1) assert len(graph.nodes) == 2 assert in1 in graph.nodes assert lin1 in graph.nodes graph = tx.Graph.build(inputs=None, outputs=lin1, add_missing_inputs=True)
def test_build_graph(): x1 = tx.Input(n_units=1000, constant=False, dtype=tf.float32) x2 = tx.Input(init_value=tf.ones([1, 3]), dtype=tf.float32, constant=True) y10 = tx.Linear(x1, n_units=3) y11 = tx.Activation(y10) y1 = tx.Module(x1, y11) y2 = tx.Add(y1, x2) output = y2 graph = Graph.build(inputs=None, outputs=[y1, y2]) # module condenses 2 nodes so it's 4 and not 6 assert len(graph.nodes) == 4 @tf.function def simple_graph(in0): x1.value = in0 return y2() simple_graph_2 = Graph.build(inputs=[x1, x2], outputs=y2) simple_graph_2 = tf.function(simple_graph_2) g = Graph.build(inputs=[x1, x2], outputs=y2) y2fn = y2.as_function() data = tf.ones([256, 1000]) x1.value = data compiled_fn = g.as_function(ord_inputs=x1, ord_outputs=output) assert tx.tensor_equal(compiled_fn(data), y2fn()) assert tx.tensor_equal(compiled_fn(data), simple_graph_2()[0]) from timeit import timeit def update_run(): x1.value = tf.random.uniform([256, 1000]) return y2fn() n = 1000 t_update_run = timeit(update_run, number=n) t_generated = timeit(lambda: compiled_fn(tf.random.uniform([256, 1000])), number=n) t_compile_value_set = timeit( lambda: simple_graph(tf.random.uniform([256, 1000])), number=n) t_graph_call_tf = timeit( lambda: simple_graph_2(tf.random.uniform([256, 1000])), number=n) assert t_generated < t_update_run assert t_generated < t_compile_value_set assert t_generated < t_graph_call_tf assert t_update_run > t_compile_value_set o1 = compiled_fn(tf.random.uniform([256, 1000])) o2 = compiled_fn(tf.random.uniform([256, 1000])) assert not tx.tensor_equal(o1, o2)
def _build_graph(self, layer, previous_state): with layer_scope(self): if previous_state is None: input_batch = tf.shape(layer.tensor)[0] zero_state = tf.zeros([input_batch, self.n_units]) self.previous_state = tx.TensorLayer(zero_state, self.n_units) if self.share_state_with is None: # determines the weight of the previous state # we could add the bias at the end but this way we just define a single bias for the r unit self.r_current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="r_current_w") self.r_recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="r_current_w") self.u_current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="u_current_w") self.u_recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="u_current_w") self.current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="current_w") self.recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="recurrent_w") # kernel_gate = tx.Activation() kernel_act = tx.Activation(kernel_linear, self.activation) self.kernel = tx.Compose(kernel_linear, kernel_act) else: self.kernel = self.share_state_with.kernel.reuse_with(layer) self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with( self.previous_state) r_state = tx.Add(r_current_w, r_recurrent_w) r_state = tx.Bias(r_state) r_gate = tx.Activation(r_state, fn=tx.sigmoid, name="r_gate") # """Gated recurrent unit (GRU) with nunits cells.""" return self.kernel.tensor + self.recurrent_kernel.tensor
def test_override_out_nodes(): x = tx.Input(n_units=2, name="x", constant=False) y = tx.Linear(x, 2, name="y") out1 = tx.Activation(y, tf.nn.softmax, name="out1") out2 = tx.Activation(out1, tf.nn.softmax, name="out2") graph = Graph.build(inputs=x, outputs=[out1, out2]) assert out1 in graph.out_nodes assert out2 in graph.out_nodes graph = Graph.build(inputs=x, outputs=out1) assert out1 in graph.out_nodes assert out2 not in graph.out_nodes
def test_linear_graph_module_integration(tmp_path): tmp_path = tmp_path.joinpath("linear") save_path = str(tmp_path) x = tx.Input(init_value=tf.ones([2, 2], dtype=tf.float32)) # x = tx.Constant(tf.constant([[32.]]), n_units=1) x = tx.Linear(x, n_units=x.n_units) linear = tx.Linear(x, n_units=4) graph = tx.Graph.build(inputs=None, outputs=linear) module = tx.Module(inputs=None, output=linear) assert len(module.inputs) == 1 assert module.inputs == list(graph.in_nodes) assert len(graph.in_nodes) == 1 tf.saved_model.save(module, save_path) module_loaded = tf.saved_model.load(save_path) assert tx.tensor_equal(module_loaded(), module()) tf.saved_model.save(linear, save_path) linear_loaded = tf.saved_model.load(save_path) assert tx.tensor_equal(module_loaded(), linear_loaded())
def test_model_vars(): target = tx.Constant([[1.]]) inputs = tx.Input(n_units=2, name="inputs", constant=False) output = tx.Linear(inputs, n_units=1, name="y") loss = tx.Lambda(target, output, fn=tf.nn.softmax_cross_entropy_with_logits, name="xent") m = tx.Model(run_outputs=output, train_inputs=[inputs, target], train_loss=loss) assert m.trainable_variables == output.trainable_variables
def test_dependency_iter(): """ Dependency iterator after adding leaves to the graph """ x1 = tx.Input(n_units=2, name="x1", constant=False) x2 = tx.Input(n_units=2, name="x2", constant=False) y1 = tx.Linear(x2, 2, name="y1") y2 = tx.Linear(y1, 2, name="y2") y3 = tx.Linear(x1, 2, name="y3") graph = Graph.build(inputs=[x1, x2], outputs=[y2, y3]) dep = graph.dependency_iter() dep_iter = list(dep) assert sorted(dep.values()) assert dep_iter[0] is x1 assert dep_iter[1] is x2 assert y1 in dep_iter[-2:] assert y2 in dep_iter[-2:] # ANOTHER GRAPH x1 = tx.Input(n_units=1, name="x1") x2 = tx.Input(n_units=1, name="x2") x3 = tx.Input(n_units=1, name="x3") h = tx.Add(x1, x2, name="h") y = tx.Add(x3, h, name="y") g = Graph.build(inputs=None, outputs=y) priorities = g.dependency_iter() assert priorities[y] == (2, 0) assert priorities[x1] == (0, 1) assert priorities[y] > priorities[h]
def test_dynamic_input_graph(): """ When we freeze the graph function with a dynamic input, the function includes a variable value read operation, that reads from the variable defined in the Input layer """ x = tx.Input(tf.zeros([2, 2]), n_units=2, constant=False) y = tx.Linear(x, 2, add_bias=False) graph_function = y.as_function() out1 = graph_function() assert tx.tensor_equal(out1, tf.zeros([2, 2])) x.value = tf.ones([2, 2]) out2 = graph_function() assert tx.tensor_equal(out2, tf.matmul(tf.ones([2, 2]), y.weights)) assert not tx.tensor_equal(out1, out2)
def test_model_var_inputs(): # wanted to test when our train graph has more inputs that do not need to be fed (e.g. variable state) n_features = 5 embed_size = 4 hidden_dim = 3 seq_size = 3 out_size = 2 batch_size = 2 x = tx.Input(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) y = tx.Input(np.random.random([batch_size, out_size]), n_units=out_size, dtype=tf.float32) lookup = tx.Lookup(x, seq_size=seq_size, embedding_shape=[n_features, embed_size]) # seq = lookup.permute_batch_time() seq = tx.Transpose(lookup, [1, 0, 2]) rnn1 = tx.RNN(seq, cell_config=tx.RNNCell.config(n_units=hidden_dim)) y_ = tx.Linear(rnn1[seq_size - 1], n_units=out_size) # y_ = tx.Linear(tx.SeqConcat(lookup, seq_size=seq_size), n_units=out_size) # @tx.layer(n_units=2, dtype=tf.float32, name="loss") # def loss(pred, labels): # return tx.mse(pred, labels) model = tx.Model(run_inputs=x, run_outputs=y_, train_inputs=[x, y], train_outputs=y_, train_loss=tx.MSE(y_, y)) # model.draw("test.pdf") model.set_optimizer(tf.optimizers.SGD, lr=0.5) data1 = [[0, 1, 2], [2, 1, 0]] data2 = [[0., 1.], [1., 0.]] model.train_step(input_feed={x: data1, y: data2})
def test_to_sparse_gradient(): target = tf.constant([[1., 0.], [1., 0.]]) x = tx.Constant(tf.ones([1, 4], dtype=tf.float32), n_units=4) h = tx.Linear(x, n_units=2) y = tx.ToSparse(h) y = tx.ToDense(y) @tf.function def loss_fn(labels, prediction): return tf.reduce_mean(tf.pow(labels - prediction, 2)) with tf.GradientTape() as tape: pred = y() loss = loss_fn(target, pred) gradients = tape.gradient(loss, h.trainable_variables) assert len(gradients) == 2 assert tx.same_shape(gradients[0], h.weights) assert tx.same_shape(gradients[1], h.bias)
def test_fully_connected(): x1 = tx.Input(init_value=[[1., 1., 1., 1.]], n_units=4, dtype=tf.float32, constant=True) x2 = tx.Input(init_value=np.random.uniform(size=[2, 4]), dtype=tf.float32, n_units=4, constant=True) y1 = tx.FC(x1, 4, add_bias=True, activation=tf.sigmoid) y2 = tx.Linear(x1, 4, add_bias=True, weights=y1.linear.weights, bias=y1.linear.bias) a2 = tx.Activation(y2, fn=tf.sigmoid) w = y2.weights b = y2.bias assert y1.linear.weights is w assert y1.linear.bias is b x = x1() y = tf.matmul(x, w) + b a = tf.sigmoid(y) assert tx.tensor_equal(y2(), y) assert tx.tensor_equal(y1(), a) assert tx.tensor_equal(y1(), a2()) assert tx.tensor_equal(a2(), a) y1 = y1.reuse_with(x2) y2 = y2.reuse_with(x2) assert y2.weights is w assert y2.bias is b assert y1.linear.weights is w assert y1.linear.bias is b