示例#1
0
 def _build_training(self):
     trainable_variables = tf.trainable_variables()
     if self.run_opt.is_distrib:
         if self.scale_lr_coef > 1.:
             log.info('Scale learning rate by coef: %f', self.scale_lr_coef)
             optimizer = tf.train.AdamOptimizer(self.learning_rate *
                                                self.scale_lr_coef)
         else:
             optimizer = tf.train.AdamOptimizer(self.learning_rate)
         optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
     else:
         optimizer = tf.train.AdamOptimizer(
             learning_rate=self.learning_rate)
     if self.mixed_prec is not None:
         _TF_VERSION = Version(TF_VERSION)
         # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed
         if _TF_VERSION < Version('1.14.0'):
             raise RuntimeError(
                 "TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!"
                 % TF_VERSION)
         elif _TF_VERSION < Version('2.4.0'):
             optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                 optimizer)
         else:
             optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(
                 optimizer)
     apply_op = optimizer.minimize(loss=self.l2_l,
                                   global_step=self.global_step,
                                   var_list=trainable_variables,
                                   name='train_step')
     train_ops = [apply_op] + self._extra_train_ops
     self.train_op = tf.group(*train_ops)
     log.info("built training")
示例#2
0
 def _build_training(self):
     trainable_variables = tf.trainable_variables()
     if self.run_opt.is_distrib:
         optimizer = tf.train.AdamOptimizer(
             learning_rate=self.learning_rate * self.run_opt.world_size)
         optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
     else:
         optimizer = tf.train.AdamOptimizer(
             learning_rate=self.learning_rate)
     apply_op = optimizer.minimize(loss=self.l2_l,
                                   global_step=self.global_step,
                                   var_list=trainable_variables,
                                   name='train_step')
     train_ops = [apply_op] + self._extra_train_ops
     self.train_op = tf.group(*train_ops)
     log.info("built training")
示例#3
0
 def _build_training(self):
     trainable_variables = tf.trainable_variables()
     optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
     if self.run_opt.is_distrib :
         optimizer = tf.train.SyncReplicasOptimizer(
             optimizer,
             replicas_to_aggregate = self.run_opt.cluster_spec.num_tasks("worker"),
             total_num_replicas = self.run_opt.cluster_spec.num_tasks("worker"),
             name = "sync_replicas")
         self.sync_replicas_hook = optimizer.make_session_run_hook(self.run_opt.is_chief)            
     grads = tf.gradients(self.l2_l, trainable_variables)
     apply_op = optimizer.apply_gradients (zip (grads, trainable_variables),
                                           global_step=self.global_step,
                                           name='train_step')
     train_ops = [apply_op] + self._extra_train_ops
     self.train_op = tf.group(*train_ops)
     self._message("built training")