Exemplo n.º 1
0
    def InitOpaqueParams(self, dtype, base_initializer):
        """Uses base_initializer to init weights from opaque cudnn params.

    Args:
      dtype: data type.
      base_initializer: a callable that returns a tensor given shape, dtype and
          partition_info.
    Returns:
      A initialized opaque cudnn params. Its weights are initialized with the
      base_initializer, and biases are set to zero.
    """
        # The shape argument isn't used.
        weights = [
            base_initializer(sp, dtype, partition_info=None)
            for sp in self.weight_shapes
        ]
        biases = [tf.zeros(sp, dtype=dtype) for sp in self.bias_shapes]
        return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
            rnn_mode='lstm',
            num_layers=1,
            num_units=self._cell_nodes,
            input_size=self._input_nodes,
            weights=weights,
            biases=biases,
            input_mode='linear_input',
            direction=self._direction)
Exemplo n.º 2
0
 def _canonical_to_opaque(self, cu_weights, cu_biases):
     if not self._input_size:
         raise RuntimeError(
             "%s._canonical_to_opaque invoked before input shape is known" %
             type(self).__name__)
     return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
         rnn_mode=self._rnn_mode,
         num_layers=self._num_layers,
         num_units=self._num_units,
         input_size=self._input_size,
         weights=cu_weights,
         biases=cu_biases,
         input_mode=self._input_mode,
         direction=self._direction)
Exemplo n.º 3
0
 def _canonical_to_opaque(self, cu_weights, cu_biases):
   if not self._input_size:
     raise RuntimeError(
         "%s._canonical_to_opaque invoked before input shape is known" %
         type(self).__name__)
   return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
       rnn_mode=self._rnn_mode,
       num_layers=self._num_layers,
       num_units=self._num_units,
       input_size=self._input_size,
       weights=cu_weights,
       biases=cu_biases,
       input_mode=self._input_mode,
       direction=self._direction)
Exemplo n.º 4
0
def init(params, use_gpu=True, skip=1):
    if use_gpu:
        i = 0
        j = 0
        init_ops = []
        units = [64, 96, 96, 512]
        prevs = [16, 64, 96, 96]
        for variable in tf.trainable_variables()[skip:]:
            if 'unknown' in str(variable.get_shape()):
                canonical_w = tf.constant(params[i], dtype=tf.float32)
                canonical_b = tf.constant(params[i + 1], dtype=tf.float32)
                lstm = CudnnLSTMSaveable(num_layers=1,
                                         num_units=units[j],
                                         input_size=prevs[j],
                                         opaque_params=variable)
                canonical_w = lstm._tf_to_cudnn_weights(0, canonical_w)
                canonical_b = lstm._tf_to_cudnn_biases(canonical_b)
                opaque_v = cudnn_rnn_canonical_to_opaque_params(
                    'lstm', 1, units[j], prevs[j], canonical_w, canonical_b)
                j += 1
                i += 2
                init_op = state_ops.assign(variable,
                                           opaque_v,
                                           validate_shape=False)

                init_ops.append(init_op)
                continue

            init_op = variable.assign(params[i])
            init_ops.append(init_op)
            i += 1
    else:
        init_ops = []
        for i, variable in enumerate(tf.trainable_variables()[skip:]):
            init_op = variable.assign(params[i])
            init_ops.append(init_op)
    return init_ops
Exemplo n.º 5
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1