def test_complex_ops_shape(self): x = K.variable(np.random.rand(25, 8, 12)) y = K.variable(np.random.rand(8, 12)) def test_func(x, func, *args, **kwargs): self.assertEquals(K.get_shape(func(x, *args, **kwargs)), K.eval(func(x, *args, **kwargs)).shape) test_func(x, K.reverse, 0) test_func(x, K.reverse, -1) test_func(x, K.repeat, 2, -1) test_func(x, K.dimshuffle, (2, 0, 1)) test_func(x, K.expand_dims, 1) test_func(x, K.pad, [[0, 0], [2, 1], [3, 0]], 'constant') test_func(x, K.reshape, (-1, 12)) test_func(y, K.antirectify) test_func(y, K.randrectify, 0.3, 0.8, 'auto') test_func(x, K.elu, 1.0) test_func(x, K.relu, 0.) test_func(x, K.tanh) test_func(x, K.softplus) test_func(y, K.softmax) test_func(x, K.softsign) test_func(x, K.linear) test_func(x, K.sigmoid) test_func(x, K.hard_sigmoid)
def test_ops(self): x = K.variable(np.random.rand(8, 12)) y = K.variable(np.random.rand(12, 25)) z = K.placeholder((25, 18, 13)) w = K.placeholder((18, 18)) # ====== dot ====== # t = K.dot(x, y) self.assertEquals(K.get_shape(t), (8, 25)) self.assertEquals(K.get_shape(t), K.eval(t).shape) t = K.dot(t, K.dimshuffle(z, (1, 0, 2))) self.assertEquals(K.get_shape(t), (8, 18, 13)) # ====== transpose ====== # self.assertEquals(K.get_shape(K.transpose(z)), (13, 18, 25)) self.assertEquals(K.get_shape(K.transpose(t, axes=(2, 0, 1))), (13, 8, 18)) # ====== eye ====== # self.assertEquals(K.get_shape(K.eye(5)), K.eval(K.eye(5)).shape) # ====== diag ====== # self.assertEquals(K.get_shape(K.diag(w)), (18, )) # self.assertEquals(K.get_shape(K.diag(x)), # K.eval(K.diag(y)).shape) self.assertEquals(K.get_shape(K.square(x)), K.eval(K.square(x)).shape) self.assertEquals(K.get_shape(K.abs(x)), K.eval(K.abs(x)).shape) self.assertEquals(K.get_shape(K.sqrt(x)), K.eval(K.sqrt(x)).shape) self.assertEquals(K.get_shape(K.exp(x)), K.eval(K.exp(x)).shape) self.assertEquals(K.get_shape(K.log(x)), K.eval(K.log(x)).shape) self.assertEquals(K.get_shape(K.round(x)), K.eval(K.round(x)).shape) self.assertEquals(K.get_shape(K.pow(x, 2)), K.eval(K.pow(x, 2)).shape) self.assertEquals(K.get_shape(K.clip(x, -1, 1)), K.eval(K.clip(x, -1, 1)).shape) self.assertEquals(K.get_shape(K.inv(x)), K.eval(K.inv(x)).shape)
def test_simple_ops_shape(self): x = K.variable(np.random.rand(25, 8, 12)) y = K.variable(18) z = K.variable(np.random.rand(25, 8, 12)) v = K.variable(np.random.rand(12, 8)) w = K.variable(np.random.rand(1, 12)) w = K.addbroadcast(w, 0) def test_func(x, y, func): self.assertEquals(K.get_shape(func(x, y)), K.eval(func(x, y)).shape) test_func(x, y, K.add) test_func(x, y, K.sub) test_func(x, y, K.mul) test_func(x, y, K.div) test_func(x, y, K.mod) test_func(x, w, K.add) test_func(x, w, K.sub) test_func(x, w, K.mul) test_func(x, w, K.div) test_func(x, w, K.mod) test_func(x, z, K.minimum) test_func(x, z, K.maximum) # test_func(x, z, K.concatenate) test_func(x, z, lambda *x: K.stack(x)) test_func(v, v, K.categorical_crossentropy)
def test_confusion_matrix(self): from sklearn.metrics import confusion_matrix y1 = np.random.randint(0, 8, size=100) y2 = np.random.randint(0, 8, size=100) y_pred = K.variable(y1) y_true = K.variable(y2) confusion = K.confusion_matrix(y_pred, y_true) r1 = K.eval(confusion) r2 = confusion_matrix(y1, y2) self.assertEqual(np.sum(r1 - r2), 0.)
def test_linear_algebra_value(self): np.random.seed(1208) x = K.variable(np.random.randn(2, 4, 3)) y = K.variable(np.random.rand(1, 2, 3, 5)) z = K.dot(x, y) self.assertEqual(K.get_shape(z), (2, 4, 1, 2, 5)) self.assertEqual( repr(np.sum(K.eval(z)))[:8], "-1.0198305134529524"[:8]) np.random.seed(1208) x = K.variable(np.random.randn(100, 3, 4, 5)) y = K.variable(np.random.rand(100, 12, 5, 6)) z = K.batched_dot(x, y) self.assertEqual(K.get_shape(z), K.eval(z).shape) self.assertEqual(repr(K.eval(z).sum())[:7], "1655.44")
def test_save_cudnn_rnn(self): np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) num_layers = 2 num_gates = 'lstm' skip_input = False is_bidirectional = False path = '/tmp/rnn' weights, biases = K.init_rnn(input_dim=8, hidden_dim=18, b_init=init_ops.random_normal_initializer(), num_layers=num_layers, num_gates=num_gates, skip_input=skip_input, is_bidirectional=is_bidirectional) rnn = N.CudnnRNN(num_units=18, W_init=weights, b_init=biases, rnn_mode=num_gates, num_layers=num_layers, skip_input=skip_input, is_bidirectional=is_bidirectional, return_states=False, dropout=0., name="CudnnRNNTest") y = rnn(X) K.initialize_all_variables() y = K.eval(y) N.serialize(nnops=rnn, path=path, binary_output=True, override=True) test_script = r""" from __future__ import print_function, division, absolute_import import os os.environ['ODIN'] = 'gpu,float32,seed=5218' import pickle import numpy as np import tensorflow as tf from tensorflow.python.ops import init_ops from odin.config import randint from odin import backend as K, nnet as N np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) rnn = N.deserialize("%s", force_restore_vars=True) y = rnn(X) K.initialize_all_variables() y = K.eval(y) print(len(rnn.variables), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight)), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias)), y.sum(), (y**2).sum()) """ % path outputs = run_script(test_script)[1] num_variables, w, b, s1, s2 = outputs.split(' ') assert int(num_variables) == len(rnn.variables) assert np.allclose(float(w), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight))) assert np.allclose(float(b), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias))) assert np.allclose(float(s1), y.sum()) assert np.allclose(float(s2), (y**2).sum())
def test_variable_and_gradient(self): with bk.framework_('torch'): w = bk.variable(x, trainable=True) s1 = bk.reduce_sum(w).detach().numpy() g1, o1 = bk.grad(lambda: bk.reduce_sum(bk.power(w, 2)), w, return_outputs=True) with bk.framework_('tf'): w = bk.variable(x, trainable=True) s2 = bk.reduce_sum(w).numpy() g2, o2 = bk.grad(lambda: bk.reduce_sum(bk.power(w, 2)), w, return_outputs=True) self.assertTrue(s1 == s2) self.assertTrue(np.all(np.isclose(g1[0].numpy(), g2[0].numpy()))) self.assertTrue(np.all(np.isclose(o1[0].detach().numpy(), o2[0].numpy())))
def test_basic_ops_value(self): np.random.seed(12082518) x = K.variable(np.random.randn(8, 8)) y = K.variable(np.random.randn(8, 8)) z = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool) w = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool) self.assertEqual(round(np.sum(K.eval(K.relu(x, alpha=0.12))) * 10000), 276733) self.assertEqual(round(np.sum(K.eval(K.elu(x, alpha=0.12))) * 10000), 289202) self.assertEqual(np.sum(K.eval(K.softmax(x))), 8.0) self.assertEqual(round(np.sum(K.eval(K.softplus(x))) * 10000), 554564) self.assertEqual(round(np.sum(K.eval(K.softsign(x))) * 100000), 211582) self.assertEqual(round(np.sum(K.eval(K.sigmoid(x))) * 10000), 330427) self.assertEqual(round(np.sum(K.eval(K.hard_sigmoid(x))) * 10000), 330836) self.assertEqual(round(np.sum(K.eval(K.tanh(x))) * 100000), 290165) self.assertEqual(round(np.sum(K.eval(K.square(x))) * 10000), 744492) self.assertEqual(round(np.sum(K.eval(K.sqrt(x))) * 10000), 300212) self.assertEqual(round(np.sum(K.eval(K.abs(x))) * 10000), 559979) self.assertEqual(np.sum(K.eval(K.sign(x))), 6.0) self.assertEqual(round(np.sum(K.eval(K.inv(x))) * 1000), 495838) self.assertEqual(round(np.sum(K.eval(K.exp(x))) * 1000), 122062) self.assertEqual(round(np.sum(K.eval(K.log(K.abs(x)))) * 10000), -344491) self.assertEqual(np.sum(K.eval(K.round(x))), 5.0) self.assertEqual(round(np.sum(K.eval(K.pow(x, 8))) * 100), 398153) self.assertEqual( round(np.sum(K.eval(K.clip(x, -0.12, 0.12))) * 1000000), 620529) # TODO: pygpu (libgpuarray) still not support diag # self.assertEqual(round(np.sum(K.eval(K.diag(x))) * 100000), 325289) self.assertEqual(np.sum(K.eval(K.eye(12, 8))), 8.0) self.assertEqual(np.sum(K.eval(K.eq(z, w))), 38) self.assertEqual(np.sum(K.eval(K.neq(z, w))), 26) self.assertEqual(np.sum(K.eval(K.gt(x, y))), 33) self.assertEqual(np.sum(K.eval(K.ge(x, y))), 33) self.assertEqual(np.sum(K.eval(K.lt(x, y))), 31) self.assertEqual(np.sum(K.eval(K.le(x, y))), 31) self.assertEqual(round(np.sum(K.eval(K.switch(z, x, y))) * 100000), 139884)
def test_upsample(self): X = K.variable(np.arange(1, 24 + 1).reshape(2, 2, 3, 2)) self.assertEqual(K.eval(K.sum(X)), 300.) self.assertEqual( K.eval(K.upsample(X, 2, axes=(1, 2), method='nn')).sum(), 1200.) self.assertEqual( K.eval(K.upsample(X, 2, axes=(1, 2), method='pad_margin')).sum(), 300.) self.assertEqual( K.eval(K.upsample(X, 2, axes=(1, 2), method='repeat')).sum(), 1200.)
def test_computational_graph2(self): np.random.seed(1208) X = K.variable(np.zeros((8, 12)), name='X') Y = K.variable(np.random.rand(12, 8), name='Y') Z = K.placeholder(shape=(8, 8), name='Z') a = K.dot(X, Y) add_roles(a, Auxiliary) a = a + Z g1 = K.ComputationGraph(a) self.assertEqual(len(g1.trainable_variables), 2) self.assertEqual(len(g1.placeholders), 1) self.assertEqual(len(g1.updates), 1) self.assertEqual(len(g1.auxiliary_variables), 1) f = K.function(Z, [a] + g1.auxiliary_variables) output = f(np.random.rand(8, 8)) self.assertEqual(repr(np.sum(output[0]))[:5], "32.20") self.assertEqual(np.sum(output[1]), 0) self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
def test_shape(self): x = K.variable(np.ones((25, 8, 12))) def test_func(func): y = func(x) yT = func.T(func(x)) self.assertEquals(K.eval(y).shape, tuple(y.shape.as_list())) self.assertEquals(K.eval(yT).shape, (25, 8, 12)) self.assertEquals(K.eval(yT).shape, tuple(yT.shape.as_list())) test_func(N.Flatten(outdim=2)) test_func(N.Flatten(outdim=1)) test_func(N.Reshape((25, 4, 2, 6, 2))) test_func(N.Dimshuffle((2, 0, 1)))
def test_computational_graph1(self): X = K.placeholder(shape=(None, 32), name='input') z = K.variable(np.random.rand(10, 10), name='z') f = N.Sequence( [N.Dense(16, activation=K.relu), N.Dense(8, activation=K.softmax)]) y = f(X) add_auxiliary_variable(y, K.constant(10, name='aux_const')) tmp = K.ComputationGraph(y) self.assertEqual(len(tmp.placeholders), 1) self.assertEqual(len(tmp.trainable_variables), 4) self.assertEqual(len(tmp.parameters), 4) self.assertEqual(len(tmp.dict_of_placeholders), 1) self.assertEqual(len(tmp.auxiliary_variables), 1) tmp.intermediary_variables # no idea how to test this self.assertEqual(len(tmp.updates), 1) self.assertEqual(K.ComputationGraph(y), tmp)
def test_auto_infer_shape(self): x = K.variable(np.random.rand(8, 25, 12)) y = K.placeholder((None, 25, 12)) def test_func(func): self.assertEquals(K.get_shape(func(x, 0)), K.eval(func(x, 0)).shape) self.assertEquals(K.get_shape(func(x, -1)), K.eval(func(x, -1)).shape) self.assertEquals(K.get_shape(func(x, 1, True)), K.eval(func(x, 1, True)).shape) self.assertEquals(K.get_shape(func(x, 0)), K.get_shape(func(y, 0))) self.assertEquals(K.get_shape(func(x, 0, True)), K.get_shape(func(y, 0, True))) if func != K.argmax and func != K.argmin: self.assertEquals(K.get_shape(func(x, (1, -1))), K.eval(func(x, (1, -1))).shape) self.assertEquals(K.get_shape(func(x, (0, 1))), K.eval(func(x, (0, 1))).shape) self.assertEquals(K.get_shape(func(x, (0, 1), True)), K.eval(func(x, (0, 1), True)).shape) test_func(K.var) test_func(K.max) test_func(K.min) test_func(K.any) test_func(K.sum) test_func(K.prod) test_func(K.mean) test_func(K.std) test_func(K.any) test_func(K.argmax) test_func(K.argmin) self.assertEquals(K.get_shape(K.argsort(x)), K.eval(K.argsort(x)).shape)
def __init__(self, output_dim, max_len=10000, trainable=False, mask_zero=False): super().__init__() self.output_dim = output_dim self.mask_zero = bool(mask_zero) self.trainable = bool(trainable) self.supports_masking = mask_zero self.max_len = max_len # Applying the cosine to even columns and sin to odds. # if zero-masked, dont use the 0 position # (i - i % 2) create a sequence of (0,0,1,1,2,2,...) which is needed # for two running sequence of sin and cos in odd and even position position_encoding = np.array([[ pos / np.power(10000, (i - i % 2) / output_dim) for i in range(output_dim) ] if pos != 0 or not mask_zero else [0.] * output_dim for pos in range(max_len)]) # [max_len, output_dim] position_encoding[:, 0::2] = np.sin(position_encoding[:, 0::2]) # dim 2i position_encoding[:, 1::2] = np.cos(position_encoding[:, 1::2]) # dim 2i+1 if not trainable: self.position_encoding = bk.array(position_encoding, dtype='float32', framework=self) else: self.position_encoding = bk.variable( initial_value=position_encoding, dtype='float32', trainable=True, framework=self)
def test_shape(self): var = K.variable(np.random.rand(8, 12)) inp = K.placeholder((None, 1, 20)) self.assertEquals(K.get_shape(var), (8, 12)) self.assertEquals(K.get_shape(inp), (None, 1, 20))
def test_cudnn_rnn_nnet(self): if get_device() == 'cpu': return print() np.random.seed(1208) batch_size = 6 hidden_size = 4 X_linear = K.placeholder(shape=(None, 3, 8), name='X_linear') X_skip = K.placeholder(shape=(None, 3, hidden_size), name='X_skip') for direction_mode in ['bidirectional', 'unidirectional']: is_bidirectional = direction_mode == 'bidirectional' for nb_layers in [2]: real_layers = nb_layers * 2 if is_bidirectional else nb_layers for rnn_mode in ['gru', 'lstm', 'rnn_relu', 'rnn_tanh']: for init_state, init_state_name in zip( [ None, # None init K.init.uniform, # function init K.variable( np.random.rand(real_layers, 1, hidden_size)), # variable K.variable( np.random.rand(real_layers, batch_size, hidden_size)), # variable K.zeros(shape=(real_layers, 1, hidden_size)), K.ones(shape=(real_layers, batch_size, hidden_size)) ], [ 'None', 'Function', 'Var1', 'VarB', 'Tensor1', 'TensorB' ]): for input_mode in ['linear', 'skip']: if input_mode == 'linear': X = X_linear x = np.random.rand(batch_size, 3, 8) else: X = X_skip x = np.random.rand(batch_size, 3, hidden_size) start = timeit.default_timer() f = N.CudnnRNN(num_units=hidden_size, rnn_mode=rnn_mode, input_mode=input_mode, num_layers=nb_layers, direction_mode=direction_mode, params_split=False, return_states=True) # perform function y = f(X, h0=init_state, c0=init_state) f = K.function(X, y) output = f(x) benchmark = timeit.default_timer() - start self.assertTrue([list(i.shape) for i in output] == [[ batch_size if j is None else j for j in K.get_shape(i) ] for i in y]) print( "*PASSED* [Layers]%s [Mode]%-8s [Input]%-6s [Direction]%-12s [State]%s [Benchmark]%.4f" % (nb_layers, rnn_mode, input_mode, direction_mode, init_state_name, benchmark))
swap_memory=False, infer_shape=True, name=name) # consistent return as theano if nb_outputs == 1: outputs = outputs[0] return outputs # ====== simulate data ====== # def doit(_, x, y, z): z += K.sum(x + y) + K.sum(K.pow(_, 2)) return z sequences = [ K.placeholder(shape=(600, None)), K.variable(np.arange(0, 1200).reshape(-1, 2)), K.variable(np.arange(1200, 2400).reshape(-1, 2)) ] outputs_info = K.zeros(shape=(1200,)) X = np.random.rand(600, 3000) # ====== tf.scan ====== # y = Scan2(doit, sequences=sequences, outputs_info=outputs_info, n_steps=None, backwards=True, name=None) print('Scan:') with utils.UnitTimer():
name=name) # consistent return as theano if nb_outputs == 1: outputs = outputs[0] return outputs # ====== simulate data ====== # def doit(_, x, y, z): z += K.sum(x + y) + K.sum(K.pow(_, 2)) return z sequences = [ K.placeholder(shape=(600, None)), K.variable(np.arange(0, 1200).reshape(-1, 2)), K.variable(np.arange(1200, 2400).reshape(-1, 2)) ] outputs_info = K.zeros(shape=(1200, )) X = np.random.rand(600, 3000) # ====== tf.scan ====== # y = Scan2(doit, sequences=sequences, outputs_info=outputs_info, n_steps=None, backwards=True, name=None) print('Scan:') with utils.UnitTimer():
def create_params(self, spec, shape, name, nnops, roles=[], nb_params=1): """ Parameters ---------- spec: variable, numpy.ndarray, function specification for initializing the weights shape: tuple, list expected shape for given variable name: str name for the variable nnops: NNOps parent operator of this parameters roles: odin.basic.VariableRole categories of this variable nb_params: int number of parameters that horizontally stacked into given `shape (e.g. nb_params=2, create 2 parameters with given `shape and horizontally stack them into 1 parameters) * do NOT support when `spec` is variable. """ if not isinstance(roles, (tuple, list)): roles = [roles] if not isinstance(nnops, NNOps): raise Exception('nnops must be instance of odin.nnet.base.NNOps') shape = tuple(shape) # convert to tuple if needed if any(d <= 0 for d in shape): raise ValueError( ("Cannot create param with a non-positive shape dimension. " "Tried to create param with shape=%r, name=%r") % (shape, name)) # ====== create parameters ====== # spec = as_tuple(spec, nb_params) spec = [_initialize_param(name, s, shape) for s in spec] # check shape returned shape = list(set([i[-1] for i in spec])) if len(shape) > 1: raise Exception( 'shape are inconsitent among all given "spec", the ' 'created shape is: %s' % str(shape)) shape = shape[0] # check spec returned spec = [i[0] for i in spec] if isinstance(spec[0], np.ndarray): with K.variable_scope(nnops.name): spec = np.concatenate(spec, axis=-1) shape = spec.shape spec = K.variable(spec, name=name) elif K.is_trainable_variable(spec[0]): if nb_params > 1: with K.variable_scope(nnops.name): spec = np.concatenate([K.get_value(i) for i in spec], axis=-1) shape = spec.shape spec = K.variable(spec, name=name) else: spec = spec[0] elif K.is_variable(spec[0]): shape = (shape[0] * nb_params,) if len(shape) == 1 \ else shape[:-1] + (shape[-1] * nb_params,) spec = K.concatenate(spec, axis=-1) # ====== assign annotations ====== # # only add role for trainable variables for i in roles: if isinstance(i, VariableRole) and K.is_trainable_variable(spec): add_role(spec, i) # return actual variable or expression # override other parameters with same name self._variables[name] = spec # set parameter attribute for NNOps setattr(nnops, name, spec) return spec
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1
def test_attention_models(self): with bk.framework_('tf'): query = bk.variable(np.random.rand(n, Tq, dim).astype('float32'), trainable=True) key = bk.variable(np.random.rand(n, Tv, dim).astype('float32'), trainable=True) value = bk.variable(np.random.rand(n, Tv, dim).astype('float32'), trainable=True) q_mask = np.random.randint(0, 2, size=(n, Tq)).astype('int32') v_mask = np.random.randint(0, 2, size=(n, Tv)).astype('int32') all_kw = [] for causal in (True, False): for residual in (True, False): for dropout in (0.0, 0.3): for temporal_dropout in (True, False): for heads in [ dict(num_heads=0, heads_depth=1, heads_bias=True, heads_regularization=0.5, heads_activation='linear'), dict(num_heads=5, heads_depth=2, heads_bias=True, heads_regularization=0.5, heads_activation='linear') ]: for scales in [ dict(scale_initializer='vaswani', scale_tied=True, scale_trainable=False), dict( scale_initializer='ones', scale_tied=False, scale_trainable=True, ) ]: for hards in [ dict( sample_shape=1, temperature=0.5, temperature_trainable=False, ), dict( sample_shape=5, temperature=1.0, temperature_trainable=True, ) ]: kw = dict( causal=causal, residual=residual, dropout=dropout, temporal_dropout=temporal_dropout) kw.update(heads) kw.update(scales) kw.update(hards) all_kw.append(kw) for kw in tqdm(all_kw): att = net.SelfAttention(dim, **kw) y, a = att(query, mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignHard) y, a = att(query, mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignRelax) y, a = att(query, mask=(q_mask, v_mask), return_attention=True) att = net.LocalPredictiveAttention(dim, **kw) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignHard) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignRelax) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True) att = net.GlobalAttention(dim, **kw) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignHard) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True) att.set_methods(alignment=net.attention_mechanism.AlignRelax) y, a = att([query, value], mask=(q_mask, v_mask), return_attention=True)
def test_attention(self): with bk.framework_('tf'): query = bk.variable(np.random.rand(n, Tq, dim).astype('float32'), trainable=True) key = bk.variable(np.random.rand(n, Tv, dim).astype('float32'), trainable=True) value = bk.variable(np.random.rand(n, Tv, dim).astype('float32'), trainable=True) q_mask = np.random.randint(0, 2, size=(n, Tq)).astype('int32') v_mask = np.random.randint(0, 2, size=(n, Tv)).astype('int32') proj_1 = bk.nn.Dense(1) proj_D = bk.nn.Dense(dim) proj_V = bk.nn.Dense(1) scale = [1. / np.sqrt(dim)] * dim num_heads = 2 q_heads = create_attention_heads(input_dim=query.shape[-1], num_heads=num_heads, depth=2) k_heads = create_attention_heads(input_dim=key.shape[-1], num_heads=num_heads, depth=2) v_heads = create_attention_heads(input_dim=value.shape[-1], num_heads=num_heads, depth=2) for heads in [[q_heads, k_heads, v_heads], [None, None, None]]: for input_method in (Inter, Intra): print() for position in (PosLocalM, PosLocalP, PosGlobal): for align_method in (AlignRelax, AlignHard, AlignSoft): for score_method in (ScoreLocation, ScoreAdditive, ScoreDotProd, ScoreCosine, ScoreGeneral): am = align_method | score_method | input_method | position am.validate() print(am) try: q, k, v, qm, vm = am.prepare( query, key, value, (q_mask, v_mask)) q, k, v = [ i if i is None or j is None else j(i) for i, j in zip([q, k, v], heads) ] with bk.GradientTape() as tape: scores = am.score( q, k, scale=scale, window_width=None, q_proj=proj_1 if ScoreLocation in am else proj_D, target_proj=proj_V) P = am.normalize(scores) out, dist = am.align( scores, q if v is None else v, query=q, v_mask=vm, q_mask=qm, causal=True, residual=True, dropout=0.3, training=True, sample_shape=2) grads = bk.grad(out, [query, key, value], tape=tape) # for name, x, g in zip(["Query", "Key", "Value"], [q, k, v], # grads): # print(" %s" % name) # print(" -", None if x is None else x.shape) # print(" -", None if g is None else # (g.shape, bk.norm(g).numpy())) # print(" Output:", out.shape) # print(" Attention Scores:", scores.shape) # print(" Attention Dist :", # dist if isinstance(dist, bay.Distribution) else dist.shape) except NotImplementedError as e: print("no support!", e)
def __init__(self, input_dim, causal=False, residual=True, dropout=0, temporal_dropout=False, num_heads=0, heads_depth=1, heads_bias=True, heads_regularization=0., heads_activation='linear', scale_initializer='vaswani', scale_tied=True, scale_trainable=False, sample_shape=1, temperature=0.5, temperature_trainable=False, name=None): super(Attention, self).__init__(name=name) self.input_dim = input_dim self.causal = bool(causal) self.residual = bool(residual) # ====== for dropout ====== # self.dropout = dropout self.temporal_dropout = bool(temporal_dropout) # ====== for hard attention ====== # self.sample_shape = int(sample_shape) self.temperature_trainable = temperature_trainable self.temperature = bk.variable(initial_value=temperature, trainable=temperature_trainable, dtype='float32', framework=self) # ====== multi-head ====== # self.num_heads = int(num_heads) self.heads_regularization = heads_regularization self.heads_depth = int(heads_depth) self.heads_bias = as_tuple(heads_bias, N=self.heads_depth, t=bool) self.heads_activation = as_tuple(heads_activation, N=self.heads_depth) # ====== initialize scale ====== # self.scale_initializer = scale_initializer self.scale_tied = scale_tied self.scale_trainable = scale_trainable if not scale_tied and input_dim is None: raise ValueError( "If scale_tied=False, the input_dim must be provided.") scale = 1 if scale_initializer is not None: if isinstance(scale_initializer, string_types): scale_initializer = scale_initializer.lower().strip() if scale_initializer == 'vaswani': assert input_dim is not None, \ "input_dim must be provided if scale_initializer='vaswani'" scale_initializer = 1 / input_dim**0.5 scale = bk.parse_initializer(scale_initializer, self) if scale_tied: scale = bk.variable(initial_value=scale(()), trainable=scale_trainable, framework=self) else: scale = bk.variable(initial_value=scale( nest.flatten(input_dim)), trainable=scale_trainable, framework=self) self.scale = scale # ====== init parameters and layers ====== # with bk.framework_(self): self.query_heads = create_attention_heads( input_dim, num_heads=self.num_heads, depth=self.heads_depth, use_bias=self.heads_bias, activation=self.heads_activation) self.key_heads = create_attention_heads( input_dim, num_heads=self.num_heads, depth=self.heads_depth, use_bias=self.heads_bias, activation=self.heads_activation) self.value_heads = create_attention_heads( input_dim, num_heads=self.num_heads, depth=self.heads_depth, use_bias=self.heads_bias, activation=self.heads_activation) # init default object self._mechanism = Inter | PosGlobal | AlignSoft | ScoreLocation # query projection for location-based scoring method self.location_proj = None # target projection use in Local Predictive attention self.target_proj = None # self._local_init() self.set_methods()
def test_variable_creation(self): np.random.seed(5218) # ====== create by numpy array ====== # tmp = np.random.rand(12, 8).astype('float32') K.variable(value=tmp, dtype='float32', name='x', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='x')) == tmp)) # ====== create by Variable name ====== # K.variable(value='x', name='z', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='z')) == tmp)) # ====== create by function ====== # def fn(shape): return np.full(shape=shape, fill_value=8) y = K.variable(value=fn, shape=(12, 18), dtype='float32', name='y', initialize=True) self.assertTrue( np.all(K.eval(y) == np.full(shape=(12, 18), fill_value=8))) # ====== create by initializer ====== # tmp = K.eval(init_ops.orthogonal_initializer(seed=5218)(shape=(8, 8))) w = K.variable(value=init_ops.orthogonal_initializer(seed=5218), shape=(8, 8), dtype='float32', name='w', initialize=True) self.assertTrue(np.all(K.eval(w) == tmp)) # ====== create by number ====== # K.variable(value=25, shape=(8, 8), dtype='float32', name='a', initialize=True) self.assertTrue(K.eval(K.variable(name='a')).sum() == 25 * 8 * 8) # ====== create by tensor ====== # t = tf.constant(value=3, shape=(12, 8), dtype='float32', name='dummy_constant') K.variable(value=t, name='b', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='b')) == K.eval(t))) # ====== create by Tensor name ====== # K.variable(value='dummy_constant', name='c', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='c')) == K.eval(t))) # ====== check all variable exist ====== # all_variables = [] all_variables_name = ['x', 'z', 'y', 'w', 'a', 'b', 'c'] for name in all_variables_name: v = K.get_all_variables(name=name) assert len(v) == 1, name all_variables.append(v[0]) # check no duplicate variables self.assertTrue(len(set(all_variables)) == len(all_variables_name))