def InitOpaqueParams(self, dtype, base_initializer): """Uses base_initializer to init weights from opaque cudnn params. Args: dtype: data type. base_initializer: a callable that returns a tensor given shape, dtype and partition_info. Returns: A initialized opaque cudnn params. Its weights are initialized with the base_initializer, and biases are set to zero. """ # The shape argument isn't used. weights = [ base_initializer(sp, dtype, partition_info=None) for sp in self.weight_shapes ] biases = [tf.zeros(sp, dtype=dtype) for sp in self.bias_shapes] return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode='lstm', num_layers=1, num_units=self._cell_nodes, input_size=self._input_nodes, weights=weights, biases=biases, input_mode='linear_input', direction=self._direction)
def _canonical_to_opaque(self, cu_weights, cu_biases): if not self._input_size: raise RuntimeError( "%s._canonical_to_opaque invoked before input shape is known" % type(self).__name__) return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, input_size=self._input_size, weights=cu_weights, biases=cu_biases, input_mode=self._input_mode, direction=self._direction)
def _canonical_to_opaque(self, cu_weights, cu_biases): if not self._input_size: raise RuntimeError( "%s._canonical_to_opaque invoked before input shape is known" % type(self).__name__) return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, input_size=self._input_size, weights=cu_weights, biases=cu_biases, input_mode=self._input_mode, direction=self._direction)
def init(params, use_gpu=True, skip=1): if use_gpu: i = 0 j = 0 init_ops = [] units = [64, 96, 96, 512] prevs = [16, 64, 96, 96] for variable in tf.trainable_variables()[skip:]: if 'unknown' in str(variable.get_shape()): canonical_w = tf.constant(params[i], dtype=tf.float32) canonical_b = tf.constant(params[i + 1], dtype=tf.float32) lstm = CudnnLSTMSaveable(num_layers=1, num_units=units[j], input_size=prevs[j], opaque_params=variable) canonical_w = lstm._tf_to_cudnn_weights(0, canonical_w) canonical_b = lstm._tf_to_cudnn_biases(canonical_b) opaque_v = cudnn_rnn_canonical_to_opaque_params( 'lstm', 1, units[j], prevs[j], canonical_w, canonical_b) j += 1 i += 2 init_op = state_ops.assign(variable, opaque_v, validate_shape=False) init_ops.append(init_op) continue init_op = variable.assign(params[i]) init_ops.append(init_op) i += 1 else: init_ops = [] for i, variable in enumerate(tf.trainable_variables()[skip:]): init_op = variable.assign(params[i]) init_ops.append(init_op) return init_ops
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1