def s_QRNN(X, n, name='qrnn'): size = X.get_shape()[-1].value length = X.get_shape()[0].value bs = X.get_shape()[1].value with vscope(name): stack_list = [] for m in range(1, n - 1): stack_list.append( tf.slice(tf.pad(X, [[m, 0], [0, 0], [0, 0]]), [0, 0, 0], [length, bs, size])) X_stacked = tf.concat([X] + stack_list, axis=-1) preact = fc_layer(X_stacked, 3 * n * size, nonlin=tf.identity, name='qrnn_pre') z, f, o = tf.split(preact, 3, len(preact.shape) - 1) z = tf.tanh(tf.add_n(tf.split(z, n, len(preact.shape) - 1))) f = tf.sigmoid(tf.add_n(tf.split(f, n, len(preact.shape) - 1))) o = tf.sigmoid(tf.add_n(tf.split(o, n, len(preact.shape) - 1))) c = linear_recurrence(f, (1 - f) * z, serial=True) h = o * c return h
def gilr_layer(X, hidden_size, nonlin=tf.nn.elu, name='gilr'): """ g_t = sigmoid(Ux_t + b) h_t = g_t h_{t-1} + (1-g_t) f(Vx_t + c) """ with vscope(name): n_dims = X.get_shape()[-1].value act = fc_layer(X, 2 * hidden_size, nonlin=tf.identity) gate, impulse = tf.split(act, 2, len(act.shape) - 1) gate = tf.sigmoid(gate) impulse = nonlin(impulse) return linear_recurrence(gate, (1 - gate) * impulse)
def s_SRU(X, name='SRU'): size = X.get_shape()[-1].value with vscope(name): preact = fc_layer(X, 3 * size, nonlin=tf.identity, name='sru_pre') x_tilde, f_pre, r_pre = tf.split(preact, 3, len(preact.shape) - 1) f = tf.sigmoid(f_pre) r = tf.sigmoid(r_pre) c = linear_recurrence(f, (1 - f) * x_tilde, serial=True) h = r * c + (1 - r) * X return h
def linear_surrogate_lstm(X, hidden_size, name='lin_sur_lstm'): with vscope(name): # 2 * hidden_size * n_dims params h_tilde = gilr_layer(X, hidden_size, nonlin=tf.tanh) # 4 * hidden_size * (hidden_size + n_dims) preact = fc_layer(tf.concat([h_tilde, X], axis=-1), 4 * hidden_size, nonlin=tf.identity) f, i, o, z = tf.split(preact, 4, len(preact.shape) - 1) f = tf.sigmoid(f) i = tf.sigmoid(i) o = tf.sigmoid(o) z = tf.tanh(z) c = linear_recurrence(f, i * z) h = o * c return h
from __future__ import print_function import numpy as np import tensorflow as tf from linear_recurrent_net.tensorflow_binding import linear_recurrence n_dims = 20 n_steps = 30 np.random.seed(2016) decays = np.random.uniform(size=(n_steps, n_dims)).astype(np.float32) impulses = np.random.randn(n_steps, n_dims).astype(np.float32) initial_state = np.random.randn(n_dims).astype(np.float32) with tf.Session() as sess: inp = tf.constant(decays) response = linear_recurrence(inp, impulses, initial_state) print( 'Decays grad err:', tf.test.compute_gradient_error(inp, decays.shape, response, impulses.shape)) inp = tf.constant(impulses) response = linear_recurrence(decays, inp, initial_state) print( 'Impulses grad err:', tf.test.compute_gradient_error(inp, impulses.shape, response, impulses.shape)) inp = tf.constant(initial_state) response = linear_recurrence(decays, impulses, inp) print(
print('Decays.real grad err:', err(tf.test.compute_gradient(response, [decays.real]))) response = lambda inp: lin_rec(tf.complex(decays.real, inp), impulses, initial_state) print('Decays.imag grad err:', err(tf.test.compute_gradient(response, [decays.imag]))) response = lambda inp: lin_rec(inp, impulses, initial_state) print('Decays grad err:', err(tf.test.compute_gradient(response, [decays]))) response = lambda inp: lin_rec(decays, inp, initial_state) print('Impulses grad err:', err(tf.test.compute_gradient(response, [impulses]))) response = lambda inp: lin_rec(decays, impulses, inp) print('Initial state grad err:', err(tf.test.compute_gradient(response, [initial_state]))) if __name__ == "__main__": print("GPU vs CPU forward pass") print( relnorm(linear_recurrence(decays, impulses, initial_state), linear_recurrence_cpu(decays, impulses, initial_state))) print("GPU") run_test(linear_recurrence) print("CPU") run_test(linear_recurrence_cpu)