Пример #1
0
    def testCreateMulticloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                          num_ps_tasks=2)

            self.assertEqual(slim.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(slim.get_variables()), 5)
            for i, v in enumerate(slim.get_variables()):
                t = i % 2
                self.assertDeviceEqual(v.device,
                                       '/job:ps/task:%d/device:CPU:0' % t)
                self.assertDeviceEqual(v.device, v.value().device)
            self.assertEqual(len(clones), 2)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device,
                                       '/job:worker/device:GPU:%d' % i)
Пример #2
0
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            model_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(slim.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                model_args)
            self.assertEqual(len(slim.get_variables()), 5)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(len(update_ops), 2)

            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            self.assertEqual(len(grads_and_vars),
                             len(tf.trainable_variables()))
            self.assertEqual(total_loss.op.name, 'total_loss')
            for g, v in grads_and_vars:
                self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
Пример #3
0
    def testCreateMulticlone(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            num_clones = 4
            deploy_config = model_deploy.DeploymentConfig(
                num_clones=num_clones)

            self.assertEqual(slim.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(slim.get_variables()), 5)
            for v in slim.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(len(clones), num_clones)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               clone.scope)
                self.assertEqual(len(update_ops), 2)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
Пример #4
0
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(slim.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(clones), 1)
            clone = clones[0]
            self.assertEqual(clone.outputs.op.name,
                             'BatchNormClassifier/fully_connected/Sigmoid')
            self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
            self.assertEqual(clone.scope, '')
            self.assertEqual(len(slim.get_variables()), 5)
            for v in slim.get_variables():
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
                self.assertDeviceEqual(v.device, v.value().device)
Пример #5
0
  def testCreateOnecloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                    num_ps_tasks=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
Пример #6
0
    def testCreateLogisticClassifier(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = LogisticClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1)

            self.assertEqual(slim.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            clone = clones[0]
            self.assertEqual(len(slim.get_variables()), 2)
            for v in slim.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(clone.outputs.op.name,
                             'LogisticClassifier/fully_connected/Sigmoid')
            self.assertEqual(clone.scope, '')
            self.assertDeviceEqual(clone.device, 'GPU:0')
            self.assertEqual(len(slim.losses.get_losses()), 1)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(update_ops, [])
Пример #7
0
  def testCreateMulticloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                    num_ps_tasks=2)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for i, v in enumerate(slim.get_variables()):
        t = i % 2
        self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
        self.assertDeviceEqual(v.device, v.value().device)
      self.assertEqual(len(clones), 2)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
Пример #8
0
  def testCreateMulticlone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      num_clones = 4
      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(len(clones), num_clones)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
        self.assertEqual(len(update_ops), 2)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
Пример #9
0
  def testCreateLogisticClassifier(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = LogisticClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      clone = clones[0]
      self.assertEqual(len(slim.get_variables()), 2)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(clone.outputs.op.name,
                       'LogisticClassifier/fully_connected/Sigmoid')
      self.assertEqual(clone.scope, '')
      self.assertDeviceEqual(clone.device, 'GPU:0')
      self.assertEqual(len(slim.losses.get_losses()), 1)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(update_ops, [])
Пример #10
0
  def testCreateOnecloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                    num_ps_tasks=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(clones), 1)
      clone = clones[0]
      self.assertEqual(clone.outputs.op.name,
                       'BatchNormClassifier/fully_connected/Sigmoid')
      self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
      self.assertEqual(clone.scope, '')
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
        self.assertDeviceEqual(v.device, v.value().device)
Пример #11
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')


    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Пример #12
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)