示例#1
0
    def test_complex_ops_shape(self):
        x = K.variable(np.random.rand(25, 8, 12))
        y = K.variable(np.random.rand(8, 12))

        def test_func(x, func, *args, **kwargs):
            self.assertEquals(K.get_shape(func(x, *args, **kwargs)),
                              K.eval(func(x, *args, **kwargs)).shape)

        test_func(x, K.reverse, 0)
        test_func(x, K.reverse, -1)
        test_func(x, K.repeat, 2, -1)
        test_func(x, K.dimshuffle, (2, 0, 1))
        test_func(x, K.expand_dims, 1)
        test_func(x, K.pad, [[0, 0], [2, 1], [3, 0]], 'constant')
        test_func(x, K.reshape, (-1, 12))

        test_func(y, K.antirectify)
        test_func(y, K.randrectify, 0.3, 0.8, 'auto')
        test_func(x, K.elu, 1.0)
        test_func(x, K.relu, 0.)
        test_func(x, K.tanh)
        test_func(x, K.softplus)
        test_func(y, K.softmax)
        test_func(x, K.softsign)
        test_func(x, K.linear)
        test_func(x, K.sigmoid)
        test_func(x, K.hard_sigmoid)
示例#2
0
    def test_ops(self):
        x = K.variable(np.random.rand(8, 12))
        y = K.variable(np.random.rand(12, 25))
        z = K.placeholder((25, 18, 13))
        w = K.placeholder((18, 18))

        # ====== dot ====== #
        t = K.dot(x, y)
        self.assertEquals(K.get_shape(t), (8, 25))
        self.assertEquals(K.get_shape(t), K.eval(t).shape)
        t = K.dot(t, K.dimshuffle(z, (1, 0, 2)))
        self.assertEquals(K.get_shape(t), (8, 18, 13))

        # ====== transpose ====== #
        self.assertEquals(K.get_shape(K.transpose(z)), (13, 18, 25))
        self.assertEquals(K.get_shape(K.transpose(t, axes=(2, 0, 1))),
                          (13, 8, 18))

        # ====== eye ====== #
        self.assertEquals(K.get_shape(K.eye(5)), K.eval(K.eye(5)).shape)
        # ====== diag ====== #
        self.assertEquals(K.get_shape(K.diag(w)), (18, ))
        # self.assertEquals(K.get_shape(K.diag(x)),
        # K.eval(K.diag(y)).shape)
        self.assertEquals(K.get_shape(K.square(x)), K.eval(K.square(x)).shape)
        self.assertEquals(K.get_shape(K.abs(x)), K.eval(K.abs(x)).shape)
        self.assertEquals(K.get_shape(K.sqrt(x)), K.eval(K.sqrt(x)).shape)
        self.assertEquals(K.get_shape(K.exp(x)), K.eval(K.exp(x)).shape)
        self.assertEquals(K.get_shape(K.log(x)), K.eval(K.log(x)).shape)
        self.assertEquals(K.get_shape(K.round(x)), K.eval(K.round(x)).shape)
        self.assertEquals(K.get_shape(K.pow(x, 2)), K.eval(K.pow(x, 2)).shape)
        self.assertEquals(K.get_shape(K.clip(x, -1, 1)),
                          K.eval(K.clip(x, -1, 1)).shape)
        self.assertEquals(K.get_shape(K.inv(x)), K.eval(K.inv(x)).shape)
示例#3
0
    def test_simple_ops_shape(self):
        x = K.variable(np.random.rand(25, 8, 12))
        y = K.variable(18)
        z = K.variable(np.random.rand(25, 8, 12))
        v = K.variable(np.random.rand(12, 8))
        w = K.variable(np.random.rand(1, 12))
        w = K.addbroadcast(w, 0)

        def test_func(x, y, func):
            self.assertEquals(K.get_shape(func(x, y)),
                              K.eval(func(x, y)).shape)

        test_func(x, y, K.add)
        test_func(x, y, K.sub)
        test_func(x, y, K.mul)
        test_func(x, y, K.div)
        test_func(x, y, K.mod)

        test_func(x, w, K.add)
        test_func(x, w, K.sub)
        test_func(x, w, K.mul)
        test_func(x, w, K.div)
        test_func(x, w, K.mod)

        test_func(x, z, K.minimum)
        test_func(x, z, K.maximum)

        # test_func(x, z, K.concatenate)
        test_func(x, z, lambda *x: K.stack(x))

        test_func(v, v, K.categorical_crossentropy)
示例#4
0
    def test_confusion_matrix(self):
        from sklearn.metrics import confusion_matrix
        y1 = np.random.randint(0, 8, size=100)
        y2 = np.random.randint(0, 8, size=100)
        y_pred = K.variable(y1)
        y_true = K.variable(y2)
        confusion = K.confusion_matrix(y_pred, y_true)

        r1 = K.eval(confusion)
        r2 = confusion_matrix(y1, y2)
        self.assertEqual(np.sum(r1 - r2), 0.)
示例#5
0
    def test_linear_algebra_value(self):
        np.random.seed(1208)
        x = K.variable(np.random.randn(2, 4, 3))
        y = K.variable(np.random.rand(1, 2, 3, 5))

        z = K.dot(x, y)
        self.assertEqual(K.get_shape(z), (2, 4, 1, 2, 5))
        self.assertEqual(
            repr(np.sum(K.eval(z)))[:8], "-1.0198305134529524"[:8])

        np.random.seed(1208)
        x = K.variable(np.random.randn(100, 3, 4, 5))
        y = K.variable(np.random.rand(100, 12, 5, 6))
        z = K.batched_dot(x, y)
        self.assertEqual(K.get_shape(z), K.eval(z).shape)
        self.assertEqual(repr(K.eval(z).sum())[:7], "1655.44")
示例#6
0
 def test_save_cudnn_rnn(self):
   np.random.seed(5218)
   X = K.variable(np.random.rand(25, 12, 8))
   num_layers = 2
   num_gates = 'lstm'
   skip_input = False
   is_bidirectional = False
   path = '/tmp/rnn'
   weights, biases = K.init_rnn(input_dim=8, hidden_dim=18,
                                b_init=init_ops.random_normal_initializer(),
                                num_layers=num_layers, num_gates=num_gates,
                                skip_input=skip_input,
                                is_bidirectional=is_bidirectional)
   rnn = N.CudnnRNN(num_units=18,
                    W_init=weights, b_init=biases,
                    rnn_mode=num_gates, num_layers=num_layers,
                    skip_input=skip_input, is_bidirectional=is_bidirectional,
                    return_states=False,
                    dropout=0., name="CudnnRNNTest")
   y = rnn(X)
   K.initialize_all_variables()
   y = K.eval(y)
   N.serialize(nnops=rnn, path=path, binary_output=True, override=True)
   test_script = r"""
   from __future__ import print_function, division, absolute_import
   import os
   os.environ['ODIN'] = 'gpu,float32,seed=5218'
   import pickle
   import numpy as np
   import tensorflow as tf
   from tensorflow.python.ops import init_ops
   from odin.config import randint
   from odin import backend as K, nnet as N
   np.random.seed(5218)
   X = K.variable(np.random.rand(25, 12, 8))
   rnn = N.deserialize("%s", force_restore_vars=True)
   y = rnn(X)
   K.initialize_all_variables()
   y = K.eval(y)
   print(len(rnn.variables),
         sum(np.sum(K.eval(i)) for i in rnn.variables
                   if K.role.has_roles(i, K.role.Weight)),
         sum(np.sum(K.eval(i)) for i in rnn.variables
             if K.role.has_roles(i, K.role.Bias)),
         y.sum(),
         (y**2).sum())
   """ % path
   outputs = run_script(test_script)[1]
   num_variables, w, b, s1, s2 = outputs.split(' ')
   assert int(num_variables) == len(rnn.variables)
   assert np.allclose(float(w),
                      sum(np.sum(K.eval(i)) for i in rnn.variables
                          if K.role.has_roles(i, K.role.Weight)))
   assert np.allclose(float(b),
                      sum(np.sum(K.eval(i)) for i in rnn.variables
                          if K.role.has_roles(i, K.role.Bias)))
   assert np.allclose(float(s1), y.sum())
   assert np.allclose(float(s2), (y**2).sum())
示例#7
0
  def test_variable_and_gradient(self):
    with bk.framework_('torch'):
      w = bk.variable(x, trainable=True)
      s1 = bk.reduce_sum(w).detach().numpy()
      g1, o1 = bk.grad(lambda: bk.reduce_sum(bk.power(w, 2)),
                       w,
                       return_outputs=True)

    with bk.framework_('tf'):
      w = bk.variable(x, trainable=True)
      s2 = bk.reduce_sum(w).numpy()
      g2, o2 = bk.grad(lambda: bk.reduce_sum(bk.power(w, 2)),
                       w,
                       return_outputs=True)

    self.assertTrue(s1 == s2)
    self.assertTrue(np.all(np.isclose(g1[0].numpy(), g2[0].numpy())))
    self.assertTrue(np.all(np.isclose(o1[0].detach().numpy(), o2[0].numpy())))
示例#8
0
    def test_basic_ops_value(self):
        np.random.seed(12082518)
        x = K.variable(np.random.randn(8, 8))
        y = K.variable(np.random.randn(8, 8))
        z = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool)
        w = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool)

        self.assertEqual(round(np.sum(K.eval(K.relu(x, alpha=0.12))) * 10000),
                         276733)
        self.assertEqual(round(np.sum(K.eval(K.elu(x, alpha=0.12))) * 10000),
                         289202)
        self.assertEqual(np.sum(K.eval(K.softmax(x))), 8.0)
        self.assertEqual(round(np.sum(K.eval(K.softplus(x))) * 10000), 554564)
        self.assertEqual(round(np.sum(K.eval(K.softsign(x))) * 100000), 211582)
        self.assertEqual(round(np.sum(K.eval(K.sigmoid(x))) * 10000), 330427)
        self.assertEqual(round(np.sum(K.eval(K.hard_sigmoid(x))) * 10000),
                         330836)
        self.assertEqual(round(np.sum(K.eval(K.tanh(x))) * 100000), 290165)
        self.assertEqual(round(np.sum(K.eval(K.square(x))) * 10000), 744492)
        self.assertEqual(round(np.sum(K.eval(K.sqrt(x))) * 10000), 300212)
        self.assertEqual(round(np.sum(K.eval(K.abs(x))) * 10000), 559979)
        self.assertEqual(np.sum(K.eval(K.sign(x))), 6.0)
        self.assertEqual(round(np.sum(K.eval(K.inv(x))) * 1000), 495838)
        self.assertEqual(round(np.sum(K.eval(K.exp(x))) * 1000), 122062)
        self.assertEqual(round(np.sum(K.eval(K.log(K.abs(x)))) * 10000),
                         -344491)
        self.assertEqual(np.sum(K.eval(K.round(x))), 5.0)
        self.assertEqual(round(np.sum(K.eval(K.pow(x, 8))) * 100), 398153)
        self.assertEqual(
            round(np.sum(K.eval(K.clip(x, -0.12, 0.12))) * 1000000), 620529)
        # TODO: pygpu (libgpuarray) still not support diag
        # self.assertEqual(round(np.sum(K.eval(K.diag(x))) * 100000), 325289)
        self.assertEqual(np.sum(K.eval(K.eye(12, 8))), 8.0)

        self.assertEqual(np.sum(K.eval(K.eq(z, w))), 38)
        self.assertEqual(np.sum(K.eval(K.neq(z, w))), 26)
        self.assertEqual(np.sum(K.eval(K.gt(x, y))), 33)
        self.assertEqual(np.sum(K.eval(K.ge(x, y))), 33)
        self.assertEqual(np.sum(K.eval(K.lt(x, y))), 31)
        self.assertEqual(np.sum(K.eval(K.le(x, y))), 31)
        self.assertEqual(round(np.sum(K.eval(K.switch(z, x, y))) * 100000),
                         139884)
示例#9
0
 def test_upsample(self):
     X = K.variable(np.arange(1, 24 + 1).reshape(2, 2, 3, 2))
     self.assertEqual(K.eval(K.sum(X)), 300.)
     self.assertEqual(
         K.eval(K.upsample(X, 2, axes=(1, 2), method='nn')).sum(), 1200.)
     self.assertEqual(
         K.eval(K.upsample(X, 2, axes=(1, 2), method='pad_margin')).sum(),
         300.)
     self.assertEqual(
         K.eval(K.upsample(X, 2, axes=(1, 2), method='repeat')).sum(),
         1200.)
示例#10
0
    def test_computational_graph2(self):
        np.random.seed(1208)

        X = K.variable(np.zeros((8, 12)), name='X')
        Y = K.variable(np.random.rand(12, 8), name='Y')
        Z = K.placeholder(shape=(8, 8), name='Z')
        a = K.dot(X, Y)
        add_roles(a, Auxiliary)
        a = a + Z
        g1 = K.ComputationGraph(a)

        self.assertEqual(len(g1.trainable_variables), 2)
        self.assertEqual(len(g1.placeholders), 1)
        self.assertEqual(len(g1.updates), 1)
        self.assertEqual(len(g1.auxiliary_variables), 1)

        f = K.function(Z, [a] + g1.auxiliary_variables)

        output = f(np.random.rand(8, 8))
        self.assertEqual(repr(np.sum(output[0]))[:5], "32.20")
        self.assertEqual(np.sum(output[1]), 0)
        self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
示例#11
0
    def test_shape(self):
        x = K.variable(np.ones((25, 8, 12)))

        def test_func(func):
            y = func(x)
            yT = func.T(func(x))
            self.assertEquals(K.eval(y).shape, tuple(y.shape.as_list()))
            self.assertEquals(K.eval(yT).shape, (25, 8, 12))
            self.assertEquals(K.eval(yT).shape, tuple(yT.shape.as_list()))

        test_func(N.Flatten(outdim=2))
        test_func(N.Flatten(outdim=1))
        test_func(N.Reshape((25, 4, 2, 6, 2)))
        test_func(N.Dimshuffle((2, 0, 1)))
示例#12
0
    def test_computational_graph1(self):
        X = K.placeholder(shape=(None, 32), name='input')
        z = K.variable(np.random.rand(10, 10), name='z')
        f = N.Sequence(
            [N.Dense(16, activation=K.relu),
             N.Dense(8, activation=K.softmax)])
        y = f(X)
        add_auxiliary_variable(y, K.constant(10, name='aux_const'))

        tmp = K.ComputationGraph(y)
        self.assertEqual(len(tmp.placeholders), 1)
        self.assertEqual(len(tmp.trainable_variables), 4)
        self.assertEqual(len(tmp.parameters), 4)
        self.assertEqual(len(tmp.dict_of_placeholders), 1)
        self.assertEqual(len(tmp.auxiliary_variables), 1)
        tmp.intermediary_variables  # no idea how to test this
        self.assertEqual(len(tmp.updates), 1)
        self.assertEqual(K.ComputationGraph(y), tmp)
示例#13
0
    def test_auto_infer_shape(self):
        x = K.variable(np.random.rand(8, 25, 12))
        y = K.placeholder((None, 25, 12))

        def test_func(func):
            self.assertEquals(K.get_shape(func(x, 0)),
                              K.eval(func(x, 0)).shape)
            self.assertEquals(K.get_shape(func(x, -1)),
                              K.eval(func(x, -1)).shape)
            self.assertEquals(K.get_shape(func(x, 1, True)),
                              K.eval(func(x, 1, True)).shape)

            self.assertEquals(K.get_shape(func(x, 0)), K.get_shape(func(y, 0)))
            self.assertEquals(K.get_shape(func(x, 0, True)),
                              K.get_shape(func(y, 0, True)))

            if func != K.argmax and func != K.argmin:
                self.assertEquals(K.get_shape(func(x, (1, -1))),
                                  K.eval(func(x, (1, -1))).shape)
                self.assertEquals(K.get_shape(func(x, (0, 1))),
                                  K.eval(func(x, (0, 1))).shape)
                self.assertEquals(K.get_shape(func(x, (0, 1), True)),
                                  K.eval(func(x, (0, 1), True)).shape)

        test_func(K.var)
        test_func(K.max)
        test_func(K.min)
        test_func(K.any)
        test_func(K.sum)
        test_func(K.prod)
        test_func(K.mean)
        test_func(K.std)
        test_func(K.any)
        test_func(K.argmax)
        test_func(K.argmin)

        self.assertEquals(K.get_shape(K.argsort(x)),
                          K.eval(K.argsort(x)).shape)
示例#14
0
    def __init__(self,
                 output_dim,
                 max_len=10000,
                 trainable=False,
                 mask_zero=False):
        super().__init__()
        self.output_dim = output_dim
        self.mask_zero = bool(mask_zero)
        self.trainable = bool(trainable)
        self.supports_masking = mask_zero
        self.max_len = max_len

        # Applying the cosine to even columns and sin to odds.
        # if zero-masked, dont use the 0 position
        # (i - i % 2) create a sequence of (0,0,1,1,2,2,...) which is needed
        # for two running sequence of sin and cos in odd and even position
        position_encoding = np.array([[
            pos / np.power(10000, (i - i % 2) / output_dim)
            for i in range(output_dim)
        ] if pos != 0 or not mask_zero else [0.] * output_dim
                                      for pos in range(max_len)])
        # [max_len, output_dim]
        position_encoding[:, 0::2] = np.sin(position_encoding[:,
                                                              0::2])  # dim 2i
        position_encoding[:,
                          1::2] = np.cos(position_encoding[:,
                                                           1::2])  # dim 2i+1
        if not trainable:
            self.position_encoding = bk.array(position_encoding,
                                              dtype='float32',
                                              framework=self)
        else:
            self.position_encoding = bk.variable(
                initial_value=position_encoding,
                dtype='float32',
                trainable=True,
                framework=self)
示例#15
0
 def test_shape(self):
     var = K.variable(np.random.rand(8, 12))
     inp = K.placeholder((None, 1, 20))
     self.assertEquals(K.get_shape(var), (8, 12))
     self.assertEquals(K.get_shape(inp), (None, 1, 20))
示例#16
0
 def test_cudnn_rnn_nnet(self):
     if get_device() == 'cpu':
         return
     print()
     np.random.seed(1208)
     batch_size = 6
     hidden_size = 4
     X_linear = K.placeholder(shape=(None, 3, 8), name='X_linear')
     X_skip = K.placeholder(shape=(None, 3, hidden_size), name='X_skip')
     for direction_mode in ['bidirectional', 'unidirectional']:
         is_bidirectional = direction_mode == 'bidirectional'
         for nb_layers in [2]:
             real_layers = nb_layers * 2 if is_bidirectional else nb_layers
             for rnn_mode in ['gru', 'lstm', 'rnn_relu', 'rnn_tanh']:
                 for init_state, init_state_name in zip(
                     [
                         None,  # None init
                         K.init.uniform,  # function init
                         K.variable(
                             np.random.rand(real_layers, 1,
                                            hidden_size)),  # variable
                         K.variable(
                             np.random.rand(real_layers, batch_size,
                                            hidden_size)),  # variable
                         K.zeros(shape=(real_layers, 1, hidden_size)),
                         K.ones(shape=(real_layers, batch_size,
                                       hidden_size))
                     ],
                     [
                         'None', 'Function', 'Var1', 'VarB', 'Tensor1',
                         'TensorB'
                     ]):
                     for input_mode in ['linear', 'skip']:
                         if input_mode == 'linear':
                             X = X_linear
                             x = np.random.rand(batch_size, 3, 8)
                         else:
                             X = X_skip
                             x = np.random.rand(batch_size, 3, hidden_size)
                         start = timeit.default_timer()
                         f = N.CudnnRNN(num_units=hidden_size,
                                        rnn_mode=rnn_mode,
                                        input_mode=input_mode,
                                        num_layers=nb_layers,
                                        direction_mode=direction_mode,
                                        params_split=False,
                                        return_states=True)
                         # perform function
                         y = f(X, h0=init_state, c0=init_state)
                         f = K.function(X, y)
                         output = f(x)
                         benchmark = timeit.default_timer() - start
                         self.assertTrue([list(i.shape)
                                          for i in output] == [[
                                              batch_size if j is None else j
                                              for j in K.get_shape(i)
                                          ] for i in y])
                         print(
                             "*PASSED* [Layers]%s [Mode]%-8s [Input]%-6s [Direction]%-12s [State]%s [Benchmark]%.4f"
                             % (nb_layers, rnn_mode, input_mode,
                                direction_mode, init_state_name, benchmark))
示例#17
0
                swap_memory=False, infer_shape=True,
                name=name)
    # consistent return as theano
    if nb_outputs == 1:
        outputs = outputs[0]
    return outputs


# ====== simulate data ====== #
def doit(_, x, y, z):
    z += K.sum(x + y) + K.sum(K.pow(_, 2))
    return z

sequences = [
    K.placeholder(shape=(600, None)),
    K.variable(np.arange(0, 1200).reshape(-1, 2)),
    K.variable(np.arange(1200, 2400).reshape(-1, 2))
]

outputs_info = K.zeros(shape=(1200,))

X = np.random.rand(600, 3000)
# ====== tf.scan ====== #
y = Scan2(doit,
          sequences=sequences,
          outputs_info=outputs_info,
          n_steps=None,
          backwards=True,
          name=None)
print('Scan:')
with utils.UnitTimer():
示例#18
0
                      name=name)
    # consistent return as theano
    if nb_outputs == 1:
        outputs = outputs[0]
    return outputs


# ====== simulate data ====== #
def doit(_, x, y, z):
    z += K.sum(x + y) + K.sum(K.pow(_, 2))
    return z


sequences = [
    K.placeholder(shape=(600, None)),
    K.variable(np.arange(0, 1200).reshape(-1, 2)),
    K.variable(np.arange(1200, 2400).reshape(-1, 2))
]

outputs_info = K.zeros(shape=(1200, ))

X = np.random.rand(600, 3000)
# ====== tf.scan ====== #
y = Scan2(doit,
          sequences=sequences,
          outputs_info=outputs_info,
          n_steps=None,
          backwards=True,
          name=None)
print('Scan:')
with utils.UnitTimer():
示例#19
0
文件: base.py 项目: liqin123/odin
    def create_params(self, spec, shape, name, nnops, roles=[], nb_params=1):
        """
        Parameters
        ----------
        spec: variable, numpy.ndarray, function
            specification for initializing the weights
        shape: tuple, list
            expected shape for given variable
        name: str
            name for the variable
        nnops: NNOps
            parent operator of this parameters
        roles: odin.basic.VariableRole
            categories of this variable
        nb_params: int
            number of parameters that horizontally stacked into
            given `shape (e.g. nb_params=2, create 2 parameters with
            given `shape and horizontally stack them into 1 parameters)
            * do NOT support when `spec` is variable.
        """
        if not isinstance(roles, (tuple, list)):
            roles = [roles]
        if not isinstance(nnops, NNOps):
            raise Exception('nnops must be instance of odin.nnet.base.NNOps')

        shape = tuple(shape)  # convert to tuple if needed
        if any(d <= 0 for d in shape):
            raise ValueError(
                ("Cannot create param with a non-positive shape dimension. "
                 "Tried to create param with shape=%r, name=%r") %
                (shape, name))

        # ====== create parameters ====== #
        spec = as_tuple(spec, nb_params)
        spec = [_initialize_param(name, s, shape) for s in spec]
        # check shape returned
        shape = list(set([i[-1] for i in spec]))
        if len(shape) > 1:
            raise Exception(
                'shape are inconsitent among all given "spec", the '
                'created shape is: %s' % str(shape))
        shape = shape[0]
        # check spec returned
        spec = [i[0] for i in spec]
        if isinstance(spec[0], np.ndarray):
            with K.variable_scope(nnops.name):
                spec = np.concatenate(spec, axis=-1)
                shape = spec.shape
                spec = K.variable(spec, name=name)
        elif K.is_trainable_variable(spec[0]):
            if nb_params > 1:
                with K.variable_scope(nnops.name):
                    spec = np.concatenate([K.get_value(i) for i in spec],
                                          axis=-1)
                    shape = spec.shape
                    spec = K.variable(spec, name=name)
            else:
                spec = spec[0]
        elif K.is_variable(spec[0]):
            shape = (shape[0] * nb_params,) if len(shape) == 1 \
                else shape[:-1] + (shape[-1] * nb_params,)
            spec = K.concatenate(spec, axis=-1)
        # ====== assign annotations ====== #
        # only add role for trainable variables
        for i in roles:
            if isinstance(i, VariableRole) and K.is_trainable_variable(spec):
                add_role(spec, i)
        # return actual variable or expression
        # override other parameters with same name
        self._variables[name] = spec
        # set parameter attribute for NNOps
        setattr(nnops, name, spec)
        return spec
示例#20
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1
示例#21
0
    def test_attention_models(self):
        with bk.framework_('tf'):
            query = bk.variable(np.random.rand(n, Tq, dim).astype('float32'),
                                trainable=True)
            key = bk.variable(np.random.rand(n, Tv, dim).astype('float32'),
                              trainable=True)
            value = bk.variable(np.random.rand(n, Tv, dim).astype('float32'),
                                trainable=True)
            q_mask = np.random.randint(0, 2, size=(n, Tq)).astype('int32')
            v_mask = np.random.randint(0, 2, size=(n, Tv)).astype('int32')

        all_kw = []
        for causal in (True, False):
            for residual in (True, False):
                for dropout in (0.0, 0.3):
                    for temporal_dropout in (True, False):
                        for heads in [
                                dict(num_heads=0,
                                     heads_depth=1,
                                     heads_bias=True,
                                     heads_regularization=0.5,
                                     heads_activation='linear'),
                                dict(num_heads=5,
                                     heads_depth=2,
                                     heads_bias=True,
                                     heads_regularization=0.5,
                                     heads_activation='linear')
                        ]:
                            for scales in [
                                    dict(scale_initializer='vaswani',
                                         scale_tied=True,
                                         scale_trainable=False),
                                    dict(
                                        scale_initializer='ones',
                                        scale_tied=False,
                                        scale_trainable=True,
                                    )
                            ]:
                                for hards in [
                                        dict(
                                            sample_shape=1,
                                            temperature=0.5,
                                            temperature_trainable=False,
                                        ),
                                        dict(
                                            sample_shape=5,
                                            temperature=1.0,
                                            temperature_trainable=True,
                                        )
                                ]:
                                    kw = dict(
                                        causal=causal,
                                        residual=residual,
                                        dropout=dropout,
                                        temporal_dropout=temporal_dropout)
                                    kw.update(heads)
                                    kw.update(scales)
                                    kw.update(hards)
                                    all_kw.append(kw)

        for kw in tqdm(all_kw):
            att = net.SelfAttention(dim, **kw)
            y, a = att(query, mask=(q_mask, v_mask), return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignHard)
            y, a = att(query, mask=(q_mask, v_mask), return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignRelax)
            y, a = att(query, mask=(q_mask, v_mask), return_attention=True)

            att = net.LocalPredictiveAttention(dim, **kw)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignHard)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignRelax)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)

            att = net.GlobalAttention(dim, **kw)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignHard)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)
            att.set_methods(alignment=net.attention_mechanism.AlignRelax)
            y, a = att([query, value],
                       mask=(q_mask, v_mask),
                       return_attention=True)
示例#22
0
    def test_attention(self):
        with bk.framework_('tf'):
            query = bk.variable(np.random.rand(n, Tq, dim).astype('float32'),
                                trainable=True)
            key = bk.variable(np.random.rand(n, Tv, dim).astype('float32'),
                              trainable=True)
            value = bk.variable(np.random.rand(n, Tv, dim).astype('float32'),
                                trainable=True)
            q_mask = np.random.randint(0, 2, size=(n, Tq)).astype('int32')
            v_mask = np.random.randint(0, 2, size=(n, Tv)).astype('int32')

            proj_1 = bk.nn.Dense(1)
            proj_D = bk.nn.Dense(dim)
            proj_V = bk.nn.Dense(1)
            scale = [1. / np.sqrt(dim)] * dim

            num_heads = 2
            q_heads = create_attention_heads(input_dim=query.shape[-1],
                                             num_heads=num_heads,
                                             depth=2)
            k_heads = create_attention_heads(input_dim=key.shape[-1],
                                             num_heads=num_heads,
                                             depth=2)
            v_heads = create_attention_heads(input_dim=value.shape[-1],
                                             num_heads=num_heads,
                                             depth=2)

            for heads in [[q_heads, k_heads, v_heads], [None, None, None]]:
                for input_method in (Inter, Intra):
                    print()
                    for position in (PosLocalM, PosLocalP, PosGlobal):
                        for align_method in (AlignRelax, AlignHard, AlignSoft):
                            for score_method in (ScoreLocation, ScoreAdditive,
                                                 ScoreDotProd, ScoreCosine,
                                                 ScoreGeneral):
                                am = align_method | score_method | input_method | position
                                am.validate()
                                print(am)
                                try:
                                    q, k, v, qm, vm = am.prepare(
                                        query, key, value, (q_mask, v_mask))
                                    q, k, v = [
                                        i if i is None or j is None else j(i)
                                        for i, j in zip([q, k, v], heads)
                                    ]
                                    with bk.GradientTape() as tape:
                                        scores = am.score(
                                            q,
                                            k,
                                            scale=scale,
                                            window_width=None,
                                            q_proj=proj_1
                                            if ScoreLocation in am else proj_D,
                                            target_proj=proj_V)
                                        P = am.normalize(scores)
                                        out, dist = am.align(
                                            scores,
                                            q if v is None else v,
                                            query=q,
                                            v_mask=vm,
                                            q_mask=qm,
                                            causal=True,
                                            residual=True,
                                            dropout=0.3,
                                            training=True,
                                            sample_shape=2)
                                        grads = bk.grad(out,
                                                        [query, key, value],
                                                        tape=tape)
                                    # for name, x, g in zip(["Query", "Key", "Value"], [q, k, v],
                                    #                       grads):
                                    #   print(" %s" % name)
                                    #   print("  -", None if x is None else x.shape)
                                    #   print("  -", None if g is None else
                                    #         (g.shape, bk.norm(g).numpy()))
                                    # print(" Output:", out.shape)
                                    # print(" Attention Scores:", scores.shape)
                                    # print(" Attention Dist  :",
                                    #       dist if isinstance(dist, bay.Distribution) else dist.shape)
                                except NotImplementedError as e:
                                    print("no support!", e)
示例#23
0
 def __init__(self,
              input_dim,
              causal=False,
              residual=True,
              dropout=0,
              temporal_dropout=False,
              num_heads=0,
              heads_depth=1,
              heads_bias=True,
              heads_regularization=0.,
              heads_activation='linear',
              scale_initializer='vaswani',
              scale_tied=True,
              scale_trainable=False,
              sample_shape=1,
              temperature=0.5,
              temperature_trainable=False,
              name=None):
     super(Attention, self).__init__(name=name)
     self.input_dim = input_dim
     self.causal = bool(causal)
     self.residual = bool(residual)
     # ====== for dropout ====== #
     self.dropout = dropout
     self.temporal_dropout = bool(temporal_dropout)
     # ====== for hard attention ====== #
     self.sample_shape = int(sample_shape)
     self.temperature_trainable = temperature_trainable
     self.temperature = bk.variable(initial_value=temperature,
                                    trainable=temperature_trainable,
                                    dtype='float32',
                                    framework=self)
     # ====== multi-head ====== #
     self.num_heads = int(num_heads)
     self.heads_regularization = heads_regularization
     self.heads_depth = int(heads_depth)
     self.heads_bias = as_tuple(heads_bias, N=self.heads_depth, t=bool)
     self.heads_activation = as_tuple(heads_activation, N=self.heads_depth)
     # ====== initialize scale ====== #
     self.scale_initializer = scale_initializer
     self.scale_tied = scale_tied
     self.scale_trainable = scale_trainable
     if not scale_tied and input_dim is None:
         raise ValueError(
             "If scale_tied=False, the input_dim must be provided.")
     scale = 1
     if scale_initializer is not None:
         if isinstance(scale_initializer, string_types):
             scale_initializer = scale_initializer.lower().strip()
             if scale_initializer == 'vaswani':
                 assert input_dim is not None, \
                   "input_dim must be provided if scale_initializer='vaswani'"
                 scale_initializer = 1 / input_dim**0.5
         scale = bk.parse_initializer(scale_initializer, self)
         if scale_tied:
             scale = bk.variable(initial_value=scale(()),
                                 trainable=scale_trainable,
                                 framework=self)
         else:
             scale = bk.variable(initial_value=scale(
                 nest.flatten(input_dim)),
                                 trainable=scale_trainable,
                                 framework=self)
     self.scale = scale
     # ====== init parameters and layers ====== #
     with bk.framework_(self):
         self.query_heads = create_attention_heads(
             input_dim,
             num_heads=self.num_heads,
             depth=self.heads_depth,
             use_bias=self.heads_bias,
             activation=self.heads_activation)
         self.key_heads = create_attention_heads(
             input_dim,
             num_heads=self.num_heads,
             depth=self.heads_depth,
             use_bias=self.heads_bias,
             activation=self.heads_activation)
         self.value_heads = create_attention_heads(
             input_dim,
             num_heads=self.num_heads,
             depth=self.heads_depth,
             use_bias=self.heads_bias,
             activation=self.heads_activation)
         # init default object
         self._mechanism = Inter | PosGlobal | AlignSoft | ScoreLocation
         # query projection for location-based scoring method
         self.location_proj = None
         # target projection use in Local Predictive attention
         self.target_proj = None
         #
         self._local_init()
         self.set_methods()
示例#24
0
    def test_variable_creation(self):
        np.random.seed(5218)
        # ====== create by numpy array ====== #
        tmp = np.random.rand(12, 8).astype('float32')
        K.variable(value=tmp, dtype='float32', name='x', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='x')) == tmp))
        # ====== create by Variable name ====== #
        K.variable(value='x', name='z', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='z')) == tmp))

        # ====== create by function ====== #

        def fn(shape):
            return np.full(shape=shape, fill_value=8)

        y = K.variable(value=fn,
                       shape=(12, 18),
                       dtype='float32',
                       name='y',
                       initialize=True)
        self.assertTrue(
            np.all(K.eval(y) == np.full(shape=(12, 18), fill_value=8)))
        # ====== create by initializer ====== #
        tmp = K.eval(init_ops.orthogonal_initializer(seed=5218)(shape=(8, 8)))
        w = K.variable(value=init_ops.orthogonal_initializer(seed=5218),
                       shape=(8, 8),
                       dtype='float32',
                       name='w',
                       initialize=True)
        self.assertTrue(np.all(K.eval(w) == tmp))
        # ====== create by number ====== #
        K.variable(value=25,
                   shape=(8, 8),
                   dtype='float32',
                   name='a',
                   initialize=True)
        self.assertTrue(K.eval(K.variable(name='a')).sum() == 25 * 8 * 8)
        # ====== create by tensor ====== #
        t = tf.constant(value=3,
                        shape=(12, 8),
                        dtype='float32',
                        name='dummy_constant')
        K.variable(value=t, name='b', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='b')) == K.eval(t)))
        # ====== create by Tensor name ====== #
        K.variable(value='dummy_constant', name='c', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='c')) == K.eval(t)))
        # ====== check all variable exist ====== #
        all_variables = []
        all_variables_name = ['x', 'z', 'y', 'w', 'a', 'b', 'c']
        for name in all_variables_name:
            v = K.get_all_variables(name=name)
            assert len(v) == 1, name
            all_variables.append(v[0])
        # check no duplicate variables
        self.assertTrue(len(set(all_variables)) == len(all_variables_name))