Пример #1
0
 def test_save_cudnn_rnn(self):
   np.random.seed(5218)
   X = K.variable(np.random.rand(25, 12, 8))
   num_layers = 2
   num_gates = 'lstm'
   skip_input = False
   is_bidirectional = False
   path = '/tmp/rnn'
   weights, biases = K.init_rnn(input_dim=8, hidden_dim=18,
                                b_init=init_ops.random_normal_initializer(),
                                num_layers=num_layers, num_gates=num_gates,
                                skip_input=skip_input,
                                is_bidirectional=is_bidirectional)
   rnn = N.CudnnRNN(num_units=18,
                    W_init=weights, b_init=biases,
                    rnn_mode=num_gates, num_layers=num_layers,
                    skip_input=skip_input, is_bidirectional=is_bidirectional,
                    return_states=False,
                    dropout=0., name="CudnnRNNTest")
   y = rnn(X)
   K.initialize_all_variables()
   y = K.eval(y)
   N.serialize(nnops=rnn, path=path, binary_output=True, override=True)
   test_script = r"""
   from __future__ import print_function, division, absolute_import
   import os
   os.environ['ODIN'] = 'gpu,float32,seed=5218'
   import pickle
   import numpy as np
   import tensorflow as tf
   from tensorflow.python.ops import init_ops
   from odin.config import randint
   from odin import backend as K, nnet as N
   np.random.seed(5218)
   X = K.variable(np.random.rand(25, 12, 8))
   rnn = N.deserialize("%s", force_restore_vars=True)
   y = rnn(X)
   K.initialize_all_variables()
   y = K.eval(y)
   print(len(rnn.variables),
         sum(np.sum(K.eval(i)) for i in rnn.variables
                   if K.role.has_roles(i, K.role.Weight)),
         sum(np.sum(K.eval(i)) for i in rnn.variables
             if K.role.has_roles(i, K.role.Bias)),
         y.sum(),
         (y**2).sum())
   """ % path
   outputs = run_script(test_script)[1]
   num_variables, w, b, s1, s2 = outputs.split(' ')
   assert int(num_variables) == len(rnn.variables)
   assert np.allclose(float(w),
                      sum(np.sum(K.eval(i)) for i in rnn.variables
                          if K.role.has_roles(i, K.role.Weight)))
   assert np.allclose(float(b),
                      sum(np.sum(K.eval(i)) for i in rnn.variables
                          if K.role.has_roles(i, K.role.Bias)))
   assert np.allclose(float(s1), y.sum())
   assert np.allclose(float(s2), (y**2).sum())
Пример #2
0
      N.Dense(128, activation=K.relu),
      N.Dense(10, activation=tf.nn.softmax)
  ], debug=True)
# ====== applying the NNOps ====== #
y_pred = ops(X)
if arg.rnn:
  loss = tf.losses.softmax_cross_entropy(y_onehot, ops(X, training=True))
else:
  loss = tf.losses.softmax_cross_entropy(y_onehot, y_pred)
acc = K.metrics.categorical_accuracy(y, y_pred, name="Acc")
cm = K.metrics.confusion_matrix(y_pred=y_pred, y_true=y, labels=10)
# ====== optimizer ====== #
optimizer = K.optimizers.Adam(lr=0.001)
updates = optimizer.minimize(loss, verbose=True)
# ====== initialize all variable ====== #
K.initialize_all_variables()
# ====== function ====== #
print('Building training functions ...')
f_train = K.function([X, y], [loss, optimizer.norm, cm],
                     updates=updates, training=True)
print('Building testing functions ...')
f_test = K.function([X, y], [loss, acc, cm], training=False)
print('Building predicting functions ...')
f_pred = K.function(X, y_pred, training=False)
# ===========================================================================
# Build trainer
# ===========================================================================
print('Start training ...')
# ====== some configurations ====== #
model_save_path = '/tmp/EXP_MNIST'
if os.path.exists(model_save_path):
Пример #3
0
                      dtype=X_probas.dtype))
f_samples = K.function(inputs=[], outputs=X_samples, training=False)
# ====== `distortion` is the negative log likelihood ====== #
if args.loss == 'ce':
  loss = tf.losses.softmax_cross_entropy(onehot_labels=X, logits=X_logits)
elif args.loss == 'mse':
  loss = tf.losses.mean_squared_error(labels=X, predictions=X_probas)
elif args.loss == 'huber':
  loss = tf.losses.huber_loss(labels=X, predictions=X_probas)
elif args.loss == 'lglo':
  loss = tf.losses.log_loss(labels=X, predictions=X_probas)
# ===========================================================================
# Optimizing the network
# ===========================================================================
update_ops = K.optimizers.Adam(lr=0.001).minimize(loss)
K.initialize_all_variables()
# ====== intitalize ====== #
record_train_loss = []
record_valid_loss = []
patience = 3
epoch = 0
# We want the rate to go up but the distortion to go down
while True:
  # ====== training ====== #
  train_losses = []
  prog = Progbar(target=X_train.shape[0], name='Epoch%d' % epoch)
  start_time = timeit.default_timer()
  for start, end in batching(batch_size=args.bs, n=X_train.shape[0],
                             seed=K.get_rng().randint(10e8)):
    _ = K.eval(loss, feed_dict={X: X_train[start:end]},
               update_after=update_ops)
Пример #4
0
def train(X,
          y_true,
          y_pred,
          train_data,
          valid_data=None,
          valid_freq=1.,
          patience=3,
          threshold=5,
          rollback=True,
          objectives=[tf.losses.softmax_cross_entropy],
          metrics=[0],
          training_metrics=[],
          l1_regu=0.,
          l2_regu=0.,
          parameters=[],
          prior_weights=None,
          sample_weights=None,
          batch_size=256,
          epochs=8,
          shuffle=True,
          optimizer='rmsprop',
          optz_kwargs={'lr': 0.001},
          updates=None,
          init_vars=True,
          labels=None,
          seed=5218,
          verbose=2):
    """

  Parameters
  ----------
  rollback : bool (default: True)
    if True, allow rollback to the best checkpoint during training
  objectives : {callable, tensorflow.Tensor}
    if `callable`, the function must take `y_true`, and `y_pred`
    The objectives must be differentiable and used for training.
  metrics : {callable, tensorflow.Tensor, int}
    if `callable`, the function must take `y_true`, and `y_pred`
    The `metrics` is for monitoring the training process.
    if `int`, it is the index of the loss in `objectives`
    NOTE: the first metrics in the list will be used for
    early-stopping (smaller is better).
  training_metrics : {callable, tensorflow.Tensor, int}
    if `int`, it is the index of the loss in `metrics`
  parameters : {list or tensorflow.Variables}
    All the parameters will be updated by the `optimizer`, if None
    or empty list is given, use ComputationalGraph to get
    all variables with Parameters roles related to the objectives
  init_vars : bool (default: True)
    automatically initialize all variables
  labels : {None, list of string}
    Given labels for classification task
  seed : int
    specific random seed for reproducible
  verbose : int
    0 - Turn off all log
    1 - only show notification
    2 - show notification, important log and summary
    3 - Show progress, summary, notification and logging
    4 - Show debug information and everything

  Return
  ------
  Function used for prediction
  """
    from odin import backend as K
    # ====== preprocess inputs ====== #
    X = as_tuple(X, t=K.is_tensor)
    y_true = as_tuple(y_true, t=K.is_tensor)
    y_pred = as_tuple(y_pred, t=K.is_tensor)
    # ====== parsing objectives and metrics ====== #
    # for training
    prior_weights = _preprocess_prior_weights(y_true=y_true,
                                              prior_weights=prior_weights)
    if prior_weights is not None:
        if sample_weights is not None:
            sample_weights = sample_weights + prior_weights
        else:
            sample_weights = prior_weights
    objectives = _preprocessing_losses(as_tuple(objectives),
                                       y_true,
                                       y_pred,
                                       sample_weights=sample_weights)
    # metrics for monitoring
    metrics = as_tuple(metrics)
    get_value = lambda x: np.mean(x)
    if len(metrics) > 0 and \
    (metrics[0] == tf.metrics.accuracy or
     metrics[0] == K.metrics.categorical_accuracy):
        get_value = lambda x: 1 - np.mean(x)
    metrics = _preprocessing_losses(metrics,
                                    y_true,
                                    y_pred,
                                    inherit_losses=objectives)
    # training_metrics
    training_metrics = _preprocessing_losses(as_tuple(training_metrics),
                                             y_true,
                                             y_pred,
                                             inherit_losses=metrics)
    # sum the objectives for differentiable
    if len(objectives) > 0:
        objectives = [
            sum(objectives) if len(objectives) > 1 else objectives[0]
        ]
    # ====== preprocess optimizer and get updates====== #
    if updates is None:  # not given updates
        if is_string(optimizer):
            optimizer = _parse_optimizer(optimizer)
            optimizer = optimizer(**optz_kwargs)
        elif not isinstance(optimizer, K.optimizers.Optimizer):
            raise ValueError(
                "`optimizer` must be string - name of algorithm or instance "
                "of odin.backend.optimizers.Optimizer")
        parameters = K.ComputationGraph(objectives).parameters\
        if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable)
        # check objectives
        if len(objectives) == 0:
            raise RuntimeError(
                "`objectives` must be given due to `updates=None`")
        weights = [
            p for p in parameters if K.role.has_roles(p, roles=K.role.Weight)
        ]
        # l1 regularization
        if l1_regu > 0.:
            l1_norm = sum(tf.norm(w, ord=1) for w in weights)
            objectives[0] += l1_norm
        # l2 regularization
        if l2_regu > 0.:
            l2_norm = sum(tf.norm(w, ord=2) for w in weights)
            objectives[0] += l2_norm
        # update rules
        updates = optimizer.get_updates(objectives[0], parameters)
        # adding global norm and learning rate
        training_metrics.append(optimizer.norm)
        training_metrics.append(optimizer.lr)
    elif K.is_operation(updates):  # given updates
        optimizer = None
    else:
        raise ValueError(
            "`updates` can be None or tensorflow Operation, but given "
            "type: %s" % str(type(updates)))
    # ====== placeholders ====== #
    inputs_plh = []
    for plh in X:
        for i in (K.ComputationGraph(plh).placeholders
                  if not K.is_placeholder(plh) else as_tuple(plh)):
            inputs_plh.append(i)
    outputs_plh = []
    for plh in y_true:  # no duplicated inputs (e.g. autoencoder X == y)
        if not K.is_placeholder(plh):
            plh = K.ComputationGraph(plh).placeholders
        for i in as_tuple(plh):
            if i not in inputs_plh:
                outputs_plh.append(i)
    inputs = inputs_plh + outputs_plh
    # ====== initialize variables ====== #
    if bool(init_vars):
        K.initialize_all_variables()
    # ====== creating function ====== #
    # training function
    f_train = K.function(inputs=inputs,
                         outputs=objectives + training_metrics,
                         updates=updates,
                         training=True)
    # scoring function
    f_score = None
    if len(metrics) > 0:
        f_score = K.function(inputs=inputs, outputs=metrics, training=False)
    # prediction function
    f_pred = K.function(inputs=inputs_plh,
                        outputs=y_pred[0] if len(y_pred) == 1 else y_pred,
                        training=False)
    # ====== preprocessing data ====== #
    train_data, valid_data = _preprocessing_data(train_data, valid_data)
    # print some debug information if necessary
    if verbose >= 4:
        print(
            "%s %s %s" %
            (ctext("============", 'cyan'), ctext(
                "Prepare for Training", 'red'), ctext("============", 'cyan')))
        print(ctext("Input placeholders:", 'yellow'))
        for i in inputs_plh:
            print(" * ", str(i))
        print(ctext("Output placeholders:", 'yellow'))
        for i in outputs_plh:
            print(" * ", str(i))
        print(ctext("Parameters:", 'yellow'))
        for p in parameters:
            print(" * ", p.name, '-', p.shape, ';', p.dtype.name)
        print(ctext("Optimizer:", 'yellow'))
        print(" * ", str(optimizer))
        print(" * Optimizer kwargs:", optz_kwargs)
        print(" * L1:", l1_regu)
        print(" * L2:", l2_regu)
        print(ctext("Training:", 'yellow'))
        print(" * Valid freq:", valid_freq)
        print(" * Patience:", patience)
        print(" * Threshold:", threshold)
        print(" * Rollback:", rollback)
        print(" * Batch size:", batch_size)
        print(" * Epoch:", epochs)
        print(" * Shuffle:", shuffle)
        print(" * Seed:", seed)
        print(ctext("Objectives:", 'yellow'))
        for o in objectives:
            print(" * ", str(o))
        print(ctext("Weights:", 'yellow'))
        print(" * Prior:", str(prior_weights))
        print(" * Sample:", str(sample_weights))
        print(ctext("Metrics:", 'yellow'))
        for m in metrics:
            print(" * ", str(m))
        print(ctext("Training metrics:", 'yellow'))
        for t in training_metrics:
            print(" * ", str(t))
        print(ctext("Training Data:", 'yellow'), str(train_data))
        print(ctext("Validating Data:", 'yellow'), str(valid_data))
        print(ctext("Labels:", 'yellow'), labels)
    # ====== create trainer ====== #
    callback_log = True if verbose > 0 else False
    trainer = MainLoop(batch_size=batch_size,
                       seed=seed if shuffle else None,
                       shuffle_level=2 if shuffle else 0,
                       allow_rollback=rollback,
                       verbose=verbose,
                       labels=labels)
    trainer.set_checkpoint(path=None, obj=None, variables=parameters)
    # create callback
    callbacks = [NaNDetector(patience=patience, log=callback_log)]
    if valid_data is not None and f_score is not None:
        callbacks.append(
            EarlyStopGeneralizationLoss(task_name='valid',
                                        output_name=metrics[0],
                                        threshold=threshold,
                                        patience=patience,
                                        log=callback_log,
                                        get_value=get_value))
    trainer.set_callbacks(callbacks)
    # set the tasks
    trainer.set_train_task(func=f_train,
                           data=train_data,
                           epoch=epochs,
                           name='train')
    if valid_data is not None and f_score is not None:
        trainer.set_valid_task(func=f_score,
                               data=valid_data,
                               freq=Timer(percentage=valid_freq),
                               name='valid')
    # running
    trainer.run()
    return f_pred
Пример #5
0
def train(X, y_true, y_pred, train_data,
          valid_data=None, valid_freq=1.,
          patience=3, threshold=5, rollback=True,
          objectives=[tf.losses.softmax_cross_entropy],
          metrics=[0], training_metrics=[],
          l1_regu=0., l2_regu=0., parameters=[],
          prior_weights=None, sample_weights=None,
          batch_size=256, epochs=8, shuffle=True,
          optimizer='rmsprop', optz_kwargs={'lr': 0.001}, updates=None,
          init_vars=True, labels=None, seed=5218, verbose=2):
  """

  Parameters
  ----------
  rollback : bool (default: True)
    if True, allow rollback to the best checkpoint during training
  objectives : {callable, tensorflow.Tensor}
    if `callable`, the function must take `y_true`, and `y_pred`
    The objectives must be differentiable and used for training.
  metrics : {callable, tensorflow.Tensor, int}
    if `callable`, the function must take `y_true`, and `y_pred`
    The `metrics` is for monitoring the training process.
    if `int`, it is the index of the loss in `objectives`
    NOTE: the first metrics in the list will be used for
    early-stopping (smaller is better).
  training_metrics : {callable, tensorflow.Tensor, int}
    if `int`, it is the index of the loss in `metrics`
  parameters : {list or tensorflow.Variables}
    All the parameters will be updated by the `optimizer`, if None
    or empty list is given, use ComputationalGraph to get
    all variables with Parameters roles related to the objectives
  init_vars : bool (default: True)
    automatically initialize all variables
  labels : {None, list of string}
    Given labels for classification task
  seed : int
    specific random seed for reproducible
  verbose : int
    0 - Turn off all log
    1 - only show notification
    2 - show notification, important log and summary
    3 - Show progress, summary, notification and logging
    4 - Show debug information and everything

  Return
  ------
  Function used for prediction
  """
  from odin import backend as K
  # ====== preprocess inputs ====== #
  X = as_tuple(X, t=K.is_tensor)
  y_true = as_tuple(y_true, t=K.is_tensor)
  y_pred = as_tuple(y_pred, t=K.is_tensor)
  # ====== parsing objectives and metrics ====== #
  # for training
  prior_weights = _preprocess_prior_weights(y_true=y_true,
                                            prior_weights=prior_weights)
  if prior_weights is not None:
    if sample_weights is not None:
      sample_weights = sample_weights + prior_weights
    else:
      sample_weights = prior_weights
  objectives = _preprocessing_losses(as_tuple(objectives), y_true, y_pred,
                                     sample_weights=sample_weights)
  # metrics for monitoring
  metrics = as_tuple(metrics)
  get_value = lambda x: np.mean(x)
  if len(metrics) > 0 and \
  (metrics[0] == tf.metrics.accuracy or
   metrics[0] == K.metrics.categorical_accuracy):
    get_value = lambda x: 1 - np.mean(x)
  metrics = _preprocessing_losses(metrics, y_true, y_pred,
                                  inherit_losses=objectives)
  # training_metrics
  training_metrics = _preprocessing_losses(as_tuple(training_metrics),
                                           y_true, y_pred,
                                           inherit_losses=metrics)
  # sum the objectives for differentiable
  if len(objectives) > 0:
    objectives = [sum(objectives) if len(objectives) > 1 else objectives[0]]
  # ====== preprocess optimizer and get updates====== #
  if updates is None: # not given updates
    if is_string(optimizer):
      optimizer = _parse_optimizer(optimizer)
      optimizer = optimizer(**optz_kwargs)
    elif not isinstance(optimizer, K.optimizers.Optimizer):
      raise ValueError("`optimizer` must be string - name of algorithm or instance "
                       "of odin.backend.optimizers.Optimizer")
    parameters = K.ComputationGraph(objectives).parameters\
    if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable)
    # check objectives
    if len(objectives) == 0:
      raise RuntimeError("`objectives` must be given due to `updates=None`")
    weights = [p for p in parameters if K.role.has_roles(p, roles=K.role.Weight)]
    # l1 regularization
    if l1_regu > 0.:
      l1_norm = sum(tf.norm(w, ord=1) for w in weights)
      objectives[0] += l1_norm
    # l2 regularization
    if l2_regu > 0.:
      l2_norm = sum(tf.norm(w, ord=2) for w in weights)
      objectives[0] += l2_norm
    # update rules
    updates = optimizer.get_updates(objectives[0], parameters)
    # adding global norm and learning rate
    training_metrics.append(optimizer.norm)
    training_metrics.append(optimizer.lr)
  elif K.is_operation(updates): # given updates
    optimizer = None
  else:
    raise ValueError("`updates` can be None or tensorflow Operation, but given "
      "type: %s" % str(type(updates)))
  # ====== placeholders ====== #
  inputs_plh = []
  for plh in X:
    for i in (K.ComputationGraph(plh).placeholders
              if not K.is_placeholder(plh)
              else as_tuple(plh)):
      inputs_plh.append(i)
  outputs_plh = []
  for plh in y_true: # no duplicated inputs (e.g. autoencoder X == y)
    if not K.is_placeholder(plh):
      plh = K.ComputationGraph(plh).placeholders
    for i in as_tuple(plh):
      if i not in inputs_plh:
        outputs_plh.append(i)
  inputs = inputs_plh + outputs_plh
  # ====== initialize variables ====== #
  if bool(init_vars):
    K.initialize_all_variables()
  # ====== creating function ====== #
  # training function
  f_train = K.function(inputs=inputs,
                       outputs=objectives + training_metrics,
                       updates=updates, training=True)
  # scoring function
  f_score = None
  if len(metrics) > 0:
    f_score = K.function(inputs=inputs, outputs=metrics,
                         training=False)
  # prediction function
  f_pred = K.function(inputs=inputs_plh,
                      outputs=y_pred[0] if len(y_pred) == 1 else y_pred,
                      training=False)
  # ====== preprocessing data ====== #
  train_data, valid_data = _preprocessing_data(train_data, valid_data)
  # print some debug information if necessary
  if verbose >= 4:
    print("%s %s %s" % (
        ctext("============", 'cyan'),
        ctext("Prepare for Training", 'red'),
        ctext("============", 'cyan')))
    print(ctext("Input placeholders:", 'yellow'))
    for i in inputs_plh:
      print(" * ", str(i))
    print(ctext("Output placeholders:", 'yellow'))
    for i in outputs_plh:
      print(" * ", str(i))
    print(ctext("Parameters:", 'yellow'))
    for p in parameters:
      print(" * ", p.name, '-', p.shape, ';', p.dtype.name)
    print(ctext("Optimizer:", 'yellow'))
    print(" * ", str(optimizer))
    print(" * Optimizer kwargs:", optz_kwargs)
    print(" * L1:", l1_regu)
    print(" * L2:", l2_regu)
    print(ctext("Training:", 'yellow'))
    print(" * Valid freq:", valid_freq)
    print(" * Patience:", patience)
    print(" * Threshold:", threshold)
    print(" * Rollback:", rollback)
    print(" * Batch size:", batch_size)
    print(" * Epoch:", epochs)
    print(" * Shuffle:", shuffle)
    print(" * Seed:", seed)
    print(ctext("Objectives:", 'yellow'))
    for o in objectives:
      print(" * ", str(o))
    print(ctext("Weights:", 'yellow'))
    print(" * Prior:", str(prior_weights))
    print(" * Sample:", str(sample_weights))
    print(ctext("Metrics:", 'yellow'))
    for m in metrics:
      print(" * ", str(m))
    print(ctext("Training metrics:", 'yellow'))
    for t in training_metrics:
      print(" * ", str(t))
    print(ctext("Training Data:", 'yellow'), str(train_data))
    print(ctext("Validating Data:", 'yellow'), str(valid_data))
    print(ctext("Labels:", 'yellow'), labels)
  # ====== create trainer ====== #
  callback_log = True if verbose > 0 else False
  trainer = MainLoop(batch_size=batch_size,
                     seed=seed if shuffle else None,
                     shuffle_level=2 if shuffle else 0,
                     allow_rollback=rollback,
                     verbose=verbose, labels=labels)
  trainer.set_checkpoint(path=None, obj=None,
                         variables=parameters)
  # create callback
  callbacks = [NaNDetector(patience=patience, log=callback_log)]
  if valid_data is not None and f_score is not None:
    callbacks.append(
        EarlyStopGeneralizationLoss(task_name='valid', output_name=metrics[0],
                                    threshold=threshold, patience=patience,
                                    log=callback_log, get_value=get_value))
  trainer.set_callbacks(callbacks)
  # set the tasks
  trainer.set_train_task(func=f_train, data=train_data,
                         epoch=epochs, name='train')
  if valid_data is not None and f_score is not None:
    trainer.set_valid_task(func=f_score, data=valid_data,
                           freq=Timer(percentage=valid_freq),
                           name='valid')
  # running
  trainer.run()
  return f_pred
Пример #6
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1